示例#1
0
    def __init__(self):
        self.base_url = "https://api.twitter.com/"

        access_token = self._authorize_twitter()
        self.query_headers = {
            "Authorization": "Bearer {}".format(access_token)
        }

        self.google_lang = GoogleLanguage()
示例#2
0
class TweetProcessor(object):
    def __init__(self):
        self.base_url = "https://api.twitter.com/"

        access_token = self._authorize_twitter()
        self.query_headers = {
            "Authorization": "Bearer {}".format(access_token)
        }

        self.google_lang = GoogleLanguage()

    def _authorize_twitter(self):
        key_secret = "{}:{}".format(
            '5gjEh1uYck534ZkyFaBEUrQJc',
            'yfb6hIVc8s5LRFSUiex3Rro1vXaafMpreu08OYJ0oHFHhBHSQG').encode(
                "ascii")
        b64_encoded_key = base64.b64encode(key_secret).decode("ascii")

        auth_url = "{}oauth2/token".format(self.base_url)
        auth_headers = {
            "Authorization": "Basic {}".format(b64_encoded_key),
            "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8"
        }
        auth_data = {"grant_type": "client_credentials"}
        auth_resp = requests.post(auth_url,
                                  headers=auth_headers,
                                  data=auth_data)
        assert auth_resp.status_code == 200
        return auth_resp.json()["access_token"]

    def extract_entities(self, tweet):
        text = tweet["full_text"]
        # unescape html text
        text = html.unescape(text)
        # remove links
        text = re.sub(r"(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b", "", text)
        # remove hashtags
        text = re.sub(r"#[A-Za-z]+", "", text)
        # remove irrelevant characters
        text = re.sub(r"[^a-zA-Z0-9.,?!/$&\"': -_\n\s]", "", text)
        # remove repeated whitespaces
        text = re.sub(r"\s{2,}", " ", text)

        print("Text: {}".format(text))

        if False:
            sentiment = self.google_lang.get_sentiment(text)
            print("Sentiment: {}, {}".format(sentiment.score,
                                             sentiment.magnitude))

        if False:
            entities = self.google_lang.get_entities_sentiment(text)
            for entity in entities:
                print("Entity: {}".format(entity.name))
                print("Sentiment: {}".format(entity.sentiment.score,
                                             entity.sentiment.magnitude))

        entities = self.google_lang.get_entities(text)
        for entity in entities:
            print("Entity: {}".format(entity.name))
            print("Type: {}".format(ENTITY_TYPES[entity.type]))
            print("Salience: {}".format(entity.salience))

        return entities

    def get_tweet(self, tweet_id):
        query_params = {"id": tweet_id, "tweet_mode": "extended"}
        search_url = "{}1.1/statuses/show.json".format(self.base_url)
        search_resp = requests.get(search_url,
                                   headers=self.query_headers,
                                   params=query_params)
        tweet_data = search_resp.json()
        return tweet_data
import requests

from google_language import GoogleLanguage
from google_language import REALLY_IMP_ENTITY_IDX

from news_utils import pretty_print_news

google_lang = GoogleLanguage()


def get_relevant_news(tweet, tweet_entities, news_articles, threshold):
    relevant_news_articles = []

    for item in news_articles:
        relevance_score = relevance_score_google(
            tweet, tweet_entities, item["title"] + ". " + item["description"])
        item["relevance_score"] = relevance_score
        if relevance_score >= threshold:
            relevant_news_articles.append(item)

    relevant_news_articles.sort(key=lambda x: x["relevance_score"],
                                reverse=True)

    final_articles = []
    sources_covered = []
    for item in relevant_news_articles:
        if item["source"]["id"] not in sources_covered:
            final_articles.append(item)
            sources_covered.append(item["source"]["id"])

    for item in final_articles[:3]:
示例#4
0
import flask
from flask import request, jsonify
import sqlite3
from newsapi import NewsApiClient
import requests
from google_language import GoogleLanguage
from read_json import body

# Init
newsapi = NewsApiClient(api_key='1f7bbd54c6ef4567a64a3424490e831d')

list_of_entities = (GoogleLanguage().get_entities(body))
string_of_entities = ', '.join(list_of_entities)

app = flask.Flask(__name__)
app.config["DEBUG"] = True

# def dict_factory(cursor, row):
#     d = {}
#     for idx, col in enumerate(cursor.description):
#         d[col[0]] = row[idx]
#     return d

# @app.route('/', methods=['GET'])
# def home():
#     return '''<h1>Distant Reading Archive</h1>
# <p>A prototype API for distant reading of science fiction novels.</p>'''


@app.route('/', methods=['GET'])
def api_all():
示例#5
0
def get_tweet_sentiment(tweet):
    google_lang = GoogleLanguage()
    tweet_sentiment_score = google_lang.get_document_sentiment(tweet).score
    return tweet_sentiment_score