def __init__(self): self.base_url = "https://api.twitter.com/" access_token = self._authorize_twitter() self.query_headers = { "Authorization": "Bearer {}".format(access_token) } self.google_lang = GoogleLanguage()
class TweetProcessor(object): def __init__(self): self.base_url = "https://api.twitter.com/" access_token = self._authorize_twitter() self.query_headers = { "Authorization": "Bearer {}".format(access_token) } self.google_lang = GoogleLanguage() def _authorize_twitter(self): key_secret = "{}:{}".format( '5gjEh1uYck534ZkyFaBEUrQJc', 'yfb6hIVc8s5LRFSUiex3Rro1vXaafMpreu08OYJ0oHFHhBHSQG').encode( "ascii") b64_encoded_key = base64.b64encode(key_secret).decode("ascii") auth_url = "{}oauth2/token".format(self.base_url) auth_headers = { "Authorization": "Basic {}".format(b64_encoded_key), "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8" } auth_data = {"grant_type": "client_credentials"} auth_resp = requests.post(auth_url, headers=auth_headers, data=auth_data) assert auth_resp.status_code == 200 return auth_resp.json()["access_token"] def extract_entities(self, tweet): text = tweet["full_text"] # unescape html text text = html.unescape(text) # remove links text = re.sub(r"(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b", "", text) # remove hashtags text = re.sub(r"#[A-Za-z]+", "", text) # remove irrelevant characters text = re.sub(r"[^a-zA-Z0-9.,?!/$&\"': -_\n\s]", "", text) # remove repeated whitespaces text = re.sub(r"\s{2,}", " ", text) print("Text: {}".format(text)) if False: sentiment = self.google_lang.get_sentiment(text) print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude)) if False: entities = self.google_lang.get_entities_sentiment(text) for entity in entities: print("Entity: {}".format(entity.name)) print("Sentiment: {}".format(entity.sentiment.score, entity.sentiment.magnitude)) entities = self.google_lang.get_entities(text) for entity in entities: print("Entity: {}".format(entity.name)) print("Type: {}".format(ENTITY_TYPES[entity.type])) print("Salience: {}".format(entity.salience)) return entities def get_tweet(self, tweet_id): query_params = {"id": tweet_id, "tweet_mode": "extended"} search_url = "{}1.1/statuses/show.json".format(self.base_url) search_resp = requests.get(search_url, headers=self.query_headers, params=query_params) tweet_data = search_resp.json() return tweet_data
import requests from google_language import GoogleLanguage from google_language import REALLY_IMP_ENTITY_IDX from news_utils import pretty_print_news google_lang = GoogleLanguage() def get_relevant_news(tweet, tweet_entities, news_articles, threshold): relevant_news_articles = [] for item in news_articles: relevance_score = relevance_score_google( tweet, tweet_entities, item["title"] + ". " + item["description"]) item["relevance_score"] = relevance_score if relevance_score >= threshold: relevant_news_articles.append(item) relevant_news_articles.sort(key=lambda x: x["relevance_score"], reverse=True) final_articles = [] sources_covered = [] for item in relevant_news_articles: if item["source"]["id"] not in sources_covered: final_articles.append(item) sources_covered.append(item["source"]["id"]) for item in final_articles[:3]:
import flask from flask import request, jsonify import sqlite3 from newsapi import NewsApiClient import requests from google_language import GoogleLanguage from read_json import body # Init newsapi = NewsApiClient(api_key='1f7bbd54c6ef4567a64a3424490e831d') list_of_entities = (GoogleLanguage().get_entities(body)) string_of_entities = ', '.join(list_of_entities) app = flask.Flask(__name__) app.config["DEBUG"] = True # def dict_factory(cursor, row): # d = {} # for idx, col in enumerate(cursor.description): # d[col[0]] = row[idx] # return d # @app.route('/', methods=['GET']) # def home(): # return '''<h1>Distant Reading Archive</h1> # <p>A prototype API for distant reading of science fiction novels.</p>''' @app.route('/', methods=['GET']) def api_all():
def get_tweet_sentiment(tweet): google_lang = GoogleLanguage() tweet_sentiment_score = google_lang.get_document_sentiment(tweet).score return tweet_sentiment_score