def standardize(): text = "This chapter is divided into sections that skip between two quite different styles. In the computing with language sections we will take on some linguistically motivated programming tasks without necessarily explaining how they work. In the closer look at Python sections we will systematically review key programming concepts. We'll flag the two styles in the section titles, but later chapters will mix both styles without being so up-front about it. We hope this style of introduction gives you an authentic taste of what will come later, while covering a range of elementary concepts in linguistics and computer science. If you have basic familiarity with both areas, you can skip to 5; we will repeat any important points in later chapters, and if you miss anything you can easily consult the online reference material at http://nltk.org/. If the material is completely new to you, this chapter will raise more questions than it answers, questions that are addressed in the rest of this book." clean_tokens = word_tokenize(text)[:] sr = stopwords.words('english') for token in word_tokenize(text): if token in stopwords.words('english'): clean_tokens.remove(token) txt = ' , '.join(clean_tokens) print(txt) tw = Twitter() tw.tweets(keywords=txt, stream=False, limit=10)
def get_live_twitter_data(): tw = Twitter() tw.tweets(keywords='flu, health, illness, hospital', stream=False, limit=5, to_screen=False) #sample from the public stream # def get_live_twitter_data(): # oath = credsfromfile() # client = Streamer(**oath) # client.register(TweetWriter( limit=20)) # client.filter(track='have a fever, flu')
def scrape_twitter(google_client): tw = Twitter() # tweets = tw.tweets(keywords='JetBlue', stream=False, limit=10) #sample from the public stream # print(tweets) oauth = credsfromfile() client = Query(**oauth) tweets = client.search_tweets( keywords='JetBlue OR #JetBlue -filter:retweets', limit=10000) topics_dict = { "tweet_texts":[], \ "ent_score":[], \ "ent_magn":[], \ "overall_score":[], \ "overall_magn":[]} for tweet in tqdm(tweets): topics_dict["tweet_texts"].append(tweet['text']) ent_score, ent_magnitude, doc_score, doc_magnitude = analyze_text( google_client, text=tweet['text']) topics_dict["ent_score"].append(ent_score) topics_dict["ent_magn"].append(ent_magnitude) topics_dict["overall_score"].append(doc_score) topics_dict["overall_magn"].append(doc_magnitude) # pprint(tweet, depth=1) # print('\n\n') print('Total Count:', len(topics_dict["tweet_texts"])) metrics = ["ent_score", "ent_magn", "overall_score", "overall_magn"] for metric in metrics: metric_score = np.asarray(topics_dict[metric]) print(metric, "Mean:", np.mean(metric_score), "St Dev:", np.std(metric_score)) with open('./csvs/twitter-jetblue-sentiment.json', 'w') as fp: json.dump(topics_dict, fp)
def __init__(self, to_screen: True, follow: None, keywords, limit: 10): self.follow = follow self.keywords = keywords tw = Twitter.tweets(to_screen=to_screen, follow=follow, keywords=keywords, limit)
def twitterclass_demo(): """ Use the simplified :class:`Twitter` class to write some tweets to a file. """ tw = Twitter() print("Track from the public stream\n") tw.tweets(keywords='love, hate', limit=10) #public stream print(SPACER) print("Search past Tweets\n") tw = Twitter() tw.tweets(keywords='love, hate', stream=False, limit=10) # search past tweets print(SPACER) print("Follow two accounts in the public stream" + " -- be prepared to wait a few minutes\n") tw = Twitter() tw.tweets(follow=['759251', '6017542'], stream=True, limit=5) #public stream
from nltk.twitter import Twitter i = 0 dir = "/Users/oskarzhang/twitter-files/" #sent_tokenizer = nltk.RegexpTokenizer("") #while(i < 1000): i += 1 tw = Twitter() tw.tweets(limit=1000)
import nltk, pprint from nltk.twitter import Twitter from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile from wordcloud import WordCloud import matplotlib.pyplot as plt from nltk.sentiment.vader import SentimentIntensityAnalyzer import seaborn as sns #Rest API from nltk.twitter import Twitter tw = Twitter() # tw.tweets(keywords='LokSabhaElection2019', limit=2) tw.tweets(keywords='LokSabhaElection2019', stream=False, limit=20) ## Read tweets totaltweets = 0 oauth = credsfromfile() client = Query(**oauth) f = open('E:/temp/twitter.txt', 'w') tweets = client.search_tweets(keywords='LokSabhaElection2019', limit=10000) for tweet in tweets: print(tweet['text']) try: f.write(tweet['text']) totaltweets += 1 except Exception: pass f.close() f = open('E:/temp/twitter.txt', 'a') oauth = credsfromfile()
from nltk.twitter import Twitter tw = Twitter() tw.tweets(keywords='love, hate', limit=10) # sample from the public stream
from nltk.twitter import Twitter if __name__ == '__main__': tw = Twitter() tw.tweets(to_screen=False, limit=500, repeat=True)
#test data for data_science project. from nltk.twitter import Twitter tw = Twitter() tw.tweets(to_screen=False, limit=5000, lang=["en"])
# export TWITTER="twitter.txt" from nltk.twitter import Twitter, Query, Streamer, credsfromfile import pickle from pprint import pprint __author__ = 'kongaloosh' import json from pprint import pprint with open('data/investments.json') as data_file: # with open('data.json') as data_file: oauth = credsfromfile() data = json.load(data_file) tw = Twitter() client = Query(**oauth) for i in range(len(data['investments'])): if type(dict(data['investments'][i])): tweets = client.search_tweets(keywords=data['investments'][i]['name'], limit=100) tweets = list(tweets) data['investments'][i]['tweets'] = tweets with open('data_pickle.pkl', 'w') as outfile: pickle.dump(data, outfile) f = pickle.load(open('data_pickle.pkl', 'r')) print(f)
from nltk.twitter import Twitter tw = Twitter() results = tw.tweets(to_screen=False, keywords='angry, upset', limit=1000)
import re #regular expression import json from pprint import pprint import sys # Functions #------------------ def frequencyDistribution(data): return {i: data.count(i) for i in data} #LIVE twitter feed #------------------ #get 10 twitter messages with #whatdoyouwant tw = Twitter() tw.tweets(keywords='nationalgriduk', stream=False, limit=10) brand = 'nationalgriduk' #API keys #------------------------ oauth = credsfromfile() client = Query(**oauth) tweets = client.search_tweets(keywords=brand, limit=20000) tweet = next(tweets) pprint(tweet, depth=1) #make sure tweets can be encoded non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) #print(x.translate(non_bmp_map))
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile from nltk.twitter import Twitter import tweepy import json from tweepy import Stream from tweepy.streaming import StreamListener from tweepy import OAuthHandler Twitt ="D:/randomt/twitter.txt" tw = Twitter() tw.tweets(keywords='jayalalitha', stream=False, limit=10)
from nltk.twitter import Twitter i = 0 dir = "/Users/oskarzhang/twitter-files/" #sent_tokenizer = nltk.RegexpTokenizer("") #while(i < 1000): i += 1 tw = Twitter() tw.tweets(limit = 1000)
import twython import nltk from nltk.twitter import Twitter tw = Twitter() tw.tweets(keywords='love, hate', limit=10) #sample from the public stream
# -*- coding: utf-8 -*- # 2.5.4節 ツイッターのデータ ツイッターデータのアクセス方法 from nltk.twitter import Twitter tw = Twitter() tw.tweets(keywords='happy', limit=10)
#********************** READ BEFORE USE ********************************************* #NLTK uses a third party library called Twython for handling twitter and must be installed with 'pip install twython' #prior to running this code #The environment variable TWITTER must also be set to the path containing the credentials text file #This can be done by copying the path the credentials.txt file is in, going to environment variables on your computer #creating a new User variable named TWITTER and pasting the directory of the text file as the variable path #When specifying a keyword and number of tweets and then running the code, A JSON file will be created and its location will be specified #in the terminal. That file needs to be moved to to the directory with this code and that file name will be used in the classification.py in the two lines where each classification is called. #You can see this in the code as a JSON file is already specified, just change that/ #the documentation for nltk and twitter can be found at http://www.nltk.org/howto/twitter.html from nltk.twitter import Twitter, credsfromfile from pprint import pprint oauth = credsfromfile() tw = Twitter() tw.tweets(keywords='shooting', limit=10) #prints to terminal tw.tweets(keywords='shooting',to_screen=False, limit=10) #prints to file
#test data for data_science project. from nltk.twitter import Twitter tw = Twitter() tw.tweets(to_screen=False, limit=10000)
import nltk from nltk.twitter import Twitter # access_token = "918706943593549824-Z3gQO9jTS3UMxR2b4d1YP4gRrWstELr" # access_token_secret = "Q93Y5ocsuLDKVrZIWgDkZTQaXEliO9KcikLHzpEFmpAaN" # consumer_key = "xoUFitbD6pKfkjLBkF7SVj0iw" # consumer_secret ="Lpgv9bgi2ns6wppoCyu8tX6JALe6vjAOZbxviEcaMo7S61sa2e" # oauth_token = "918706943593549824-Z3gQO9jTS3UMxR2b4d1YP4gRrWstELr" # oauth_token_secret = "Q93Y5ocsuLDKVrZIWgDkZTQaXEliO9KcikLHzpEFmpAaN" # app_key = "xoUFitbD6pKfkjLBkF7SVj0iw" # app_secret ="Lpgv9bgi2ns6wppoCyu8tX6JALe6vjAOZbxviEcaMo7S61sa2e" tw = Twitter() tw.tweets(keywords='cher, python, nlp, soccer, celine dion', limit=10)
#!/usr/bin/env python # -*- coding: utf-8 -*- from nltk.twitter import Twitter import os var = os.environ os.environ["TWITTER"] = "C:/Users/admin/Documents/twitter-files" tw = Twitter() tw.tweets(keywords='algeria, algerie', limit=10) # sample from the public stream from nltk.corpus import twitter_samples strings = twitter_samples.strings('tweets.20150430-223406.json') for string in strings[:15]: print(string)
def twitterclass_demo(): """ Use the simplified :class:`Twitter` class to write some tweets to a file. """ tw = Twitter() print("Track from the public stream\n") tw.tweets(keywords="love, hate", limit=10) # public stream print(SPACER) print("Search past Tweets\n") tw = Twitter() tw.tweets(keywords="love, hate", stream=False, limit=10) # search past tweets print(SPACER) print("Follow two accounts in the public stream" + " -- be prepared to wait a few minutes\n") tw = Twitter() tw.tweets(follow=["759251", "6017542"], stream=True, limit=5) # public stream
from nltk.twitter import Twitter tw = Twitter() tw.tweets(to_screen=False, limit=20000, lang=['en'])
import nltk import twython from nltk.twitter import Twitter os.environ('TWITTER') = "/twitter-files" #export TWITTER="/twitter-files" tw = Twitter() tw.tweets(keywords='love, hate', limit=10)
from nltk.twitter.common import json2csv from nltk.twitter.common import json2csv_entities from nltk.corpus import twitter_samples from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile import pandas as pd tw = Twitter() tw.tweets(follow=['759251'], limit=10) # see what CNN is talking about