Пример #1
0
 def standardize():
     text = "This chapter is divided into sections that skip between two quite different styles. In the computing with language sections we will take on some linguistically motivated programming tasks without necessarily explaining how they work. In the closer look at Python sections we will systematically review key programming concepts. We'll flag the two styles in the section titles, but later chapters will mix both styles without being so up-front about it. We hope this style of introduction gives you an authentic taste of what will come later, while covering a range of elementary concepts in linguistics and computer science. If you have basic familiarity with both areas, you can skip to 5; we will repeat any important points in later chapters, and if you miss anything you can easily consult the online reference material at http://nltk.org/. If the material is completely new to you, this chapter will raise more questions than it answers, questions that are addressed in the rest of this book."
     clean_tokens = word_tokenize(text)[:]
     sr = stopwords.words('english')
     for token in word_tokenize(text):
         if token in stopwords.words('english'):
             clean_tokens.remove(token)
     txt = ' , '.join(clean_tokens)
     print(txt)
     tw = Twitter()
     tw.tweets(keywords=txt, stream=False, limit=10)
Пример #2
0
def get_live_twitter_data():
    tw = Twitter()
    tw.tweets(keywords='flu, health, illness, hospital',
              stream=False,
              limit=5,
              to_screen=False)  #sample from the public stream


# def get_live_twitter_data():
# oath = credsfromfile()
# client = Streamer(**oath)
# client.register(TweetWriter( limit=20))
# client.filter(track='have a fever, flu')
Пример #3
0
def scrape_twitter(google_client):
    tw = Twitter()
    # tweets = tw.tweets(keywords='JetBlue', stream=False, limit=10) #sample from the public stream
    # print(tweets)
    oauth = credsfromfile()
    client = Query(**oauth)
    tweets = client.search_tweets(
        keywords='JetBlue OR #JetBlue -filter:retweets', limit=10000)

    topics_dict = { "tweet_texts":[], \
                    "ent_score":[], \
                    "ent_magn":[], \
                    "overall_score":[], \
                    "overall_magn":[]}

    for tweet in tqdm(tweets):
        topics_dict["tweet_texts"].append(tweet['text'])
        ent_score, ent_magnitude, doc_score, doc_magnitude = analyze_text(
            google_client, text=tweet['text'])
        topics_dict["ent_score"].append(ent_score)
        topics_dict["ent_magn"].append(ent_magnitude)
        topics_dict["overall_score"].append(doc_score)
        topics_dict["overall_magn"].append(doc_magnitude)
        # pprint(tweet, depth=1)
        # print('\n\n')

    print('Total Count:', len(topics_dict["tweet_texts"]))
    metrics = ["ent_score", "ent_magn", "overall_score", "overall_magn"]
    for metric in metrics:
        metric_score = np.asarray(topics_dict[metric])
        print(metric, "Mean:", np.mean(metric_score), "St Dev:",
              np.std(metric_score))

    with open('./csvs/twitter-jetblue-sentiment.json', 'w') as fp:
        json.dump(topics_dict, fp)
Пример #4
0
 def __init__(self, to_screen: True, follow: None, keywords, limit: 10):
     self.follow = follow
     self.keywords = keywords
     tw = Twitter.tweets(to_screen=to_screen,
                         follow=follow,
                         keywords=keywords,
                         limit)
Пример #5
0
def twitterclass_demo():
    """
    Use the simplified :class:`Twitter` class to write some tweets to a file.
    """
    tw = Twitter()
    print("Track from the public stream\n")
    tw.tweets(keywords='love, hate', limit=10) #public stream
    print(SPACER)
    print("Search past Tweets\n")
    tw = Twitter()
    tw.tweets(keywords='love, hate', stream=False, limit=10) # search past tweets
    print(SPACER)
    print("Follow two accounts in the public stream" +
          " -- be prepared to wait a few minutes\n")
    tw = Twitter()
    tw.tweets(follow=['759251', '6017542'], stream=True, limit=5) #public stream
Пример #6
0
from nltk.twitter import Twitter

i = 0
dir = "/Users/oskarzhang/twitter-files/"
#sent_tokenizer = nltk.RegexpTokenizer("")
#while(i < 1000):	i += 1
tw = Twitter()
tw.tweets(limit=1000)
Пример #7
0
import nltk, pprint
from nltk.twitter import Twitter
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import seaborn as sns

#Rest API
from nltk.twitter import Twitter
tw = Twitter()
# tw.tweets(keywords='LokSabhaElection2019', limit=2)
tw.tweets(keywords='LokSabhaElection2019', stream=False, limit=20)

## Read tweets
totaltweets = 0
oauth = credsfromfile()
client = Query(**oauth)
f = open('E:/temp/twitter.txt', 'w')
tweets = client.search_tweets(keywords='LokSabhaElection2019', limit=10000)
for tweet in tweets:
    print(tweet['text'])
    try:
        f.write(tweet['text'])
        totaltweets += 1
    except Exception:
        pass
f.close()

f = open('E:/temp/twitter.txt', 'a')
oauth = credsfromfile()
Пример #8
0
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(keywords='love, hate', limit=10)  # sample from the public stream
Пример #9
0
from nltk.twitter import Twitter

if __name__ == '__main__':
    tw = Twitter()
    tw.tweets(to_screen=False, limit=500, repeat=True)
Пример #10
0
#test data for data_science project.

from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(to_screen=False, limit=5000, lang=["en"])
# export TWITTER="twitter.txt"

from nltk.twitter import Twitter, Query, Streamer, credsfromfile
import pickle
from pprint import pprint

__author__ = 'kongaloosh'

import json
from pprint import pprint

with open('data/investments.json') as data_file:
# with open('data.json') as data_file:
    oauth = credsfromfile()
    data = json.load(data_file)
    tw = Twitter()
    client = Query(**oauth)

    for i in range(len(data['investments'])):
            if type(dict(data['investments'][i])):
                tweets = client.search_tweets(keywords=data['investments'][i]['name'], limit=100)
                tweets = list(tweets)
                data['investments'][i]['tweets'] = tweets

    with open('data_pickle.pkl', 'w') as outfile:
        pickle.dump(data, outfile)

f = pickle.load(open('data_pickle.pkl', 'r'))
print(f)
Пример #12
0
from nltk.twitter import Twitter

tw = Twitter()
results = tw.tweets(to_screen=False, keywords='angry, upset', limit=1000)
Пример #13
0
import re  #regular expression
import json
from pprint import pprint
import sys


# Functions
#------------------
def frequencyDistribution(data):
    return {i: data.count(i) for i in data}


#LIVE twitter feed
#------------------
#get 10 twitter messages with #whatdoyouwant
tw = Twitter()
tw.tweets(keywords='nationalgriduk', stream=False, limit=10)

brand = 'nationalgriduk'

#API keys
#------------------------
oauth = credsfromfile()
client = Query(**oauth)
tweets = client.search_tweets(keywords=brand, limit=20000)
tweet = next(tweets)
pprint(tweet, depth=1)

#make sure tweets can be encoded
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
#print(x.translate(non_bmp_map))
Пример #14
0
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
from nltk.twitter import Twitter
import tweepy
import json
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler

Twitt ="D:/randomt/twitter.txt"

tw = Twitter()
tw.tweets(keywords='jayalalitha', stream=False, limit=10)
from nltk.twitter import Twitter

i = 0
dir = "/Users/oskarzhang/twitter-files/"
#sent_tokenizer = nltk.RegexpTokenizer("")
#while(i < 1000):	i += 1
tw = Twitter()
tw.tweets(limit = 1000)
	
Пример #16
0
import twython
import nltk
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(keywords='love, hate', limit=10) #sample from the public stream
Пример #17
0
# -*- coding: utf-8 -*-
# 2.5.4節  ツイッターのデータ  ツイッターデータのアクセス方法
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(keywords='happy', limit=10)
Пример #18
0
#********************** READ BEFORE USE  *********************************************
#NLTK uses a third party library called Twython for handling twitter and must be installed with 'pip install twython'
#prior to running this code
#The environment variable TWITTER must also be set to the path containing the credentials text file
#This can be done by copying the path the credentials.txt file is in, going to environment variables on your computer
#creating a new User variable named TWITTER and pasting the directory of the text file as the variable path
#When specifying a keyword and number of tweets and then running the code, A JSON file will be created and its location will be specified
#in the terminal.  That file needs to be moved to to the directory with this code and that file name will be used in the classification.py in the two lines where each classification is called.
#You can see this in the code as a JSON file is already specified, just change that/
#the documentation for nltk and twitter can be found at http://www.nltk.org/howto/twitter.html
from nltk.twitter import Twitter, credsfromfile
from pprint import pprint
oauth = credsfromfile()
tw = Twitter()
tw.tweets(keywords='shooting',  limit=10) #prints to terminal 
tw.tweets(keywords='shooting',to_screen=False,  limit=10) #prints to file
Пример #19
0
#test data for data_science project.

from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(to_screen=False, limit=10000)
Пример #20
0
import nltk
from nltk.twitter import Twitter

# access_token = "918706943593549824-Z3gQO9jTS3UMxR2b4d1YP4gRrWstELr"
# access_token_secret = "Q93Y5ocsuLDKVrZIWgDkZTQaXEliO9KcikLHzpEFmpAaN"
# consumer_key = "xoUFitbD6pKfkjLBkF7SVj0iw"
# consumer_secret ="Lpgv9bgi2ns6wppoCyu8tX6JALe6vjAOZbxviEcaMo7S61sa2e"
# oauth_token = "918706943593549824-Z3gQO9jTS3UMxR2b4d1YP4gRrWstELr"
# oauth_token_secret = "Q93Y5ocsuLDKVrZIWgDkZTQaXEliO9KcikLHzpEFmpAaN"
# app_key = "xoUFitbD6pKfkjLBkF7SVj0iw"
# app_secret ="Lpgv9bgi2ns6wppoCyu8tX6JALe6vjAOZbxviEcaMo7S61sa2e"

tw = Twitter()

tw.tweets(keywords='cher, python, nlp, soccer, celine dion', limit=10)
Пример #21
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from nltk.twitter import Twitter
import os


var = os.environ
os.environ["TWITTER"] = "C:/Users/admin/Documents/twitter-files"

tw = Twitter()
tw.tweets(keywords='algeria, algerie', limit=10)

# sample from the public stream

from nltk.corpus import twitter_samples
strings = twitter_samples.strings('tweets.20150430-223406.json')
for string in strings[:15]:
    print(string)
Пример #22
0
def twitterclass_demo():
    """
    Use the simplified :class:`Twitter` class to write some tweets to a file.
    """
    tw = Twitter()
    print("Track from the public stream\n")
    tw.tweets(keywords="love, hate", limit=10)  # public stream
    print(SPACER)
    print("Search past Tweets\n")
    tw = Twitter()
    tw.tweets(keywords="love, hate", stream=False,
              limit=10)  # search past tweets
    print(SPACER)
    print("Follow two accounts in the public stream" +
          " -- be prepared to wait a few minutes\n")
    tw = Twitter()
    tw.tweets(follow=["759251", "6017542"], stream=True,
              limit=5)  # public stream
Пример #23
0
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(to_screen=False, limit=20000, lang=['en'])
Пример #24
0
import nltk
import twython

from nltk.twitter import Twitter

os.environ('TWITTER') = "/twitter-files"
#export TWITTER="/twitter-files"

tw = Twitter()
tw.tweets(keywords='love, hate', limit=10)
Пример #25
0
from nltk.twitter.common import json2csv
from nltk.twitter.common import json2csv_entities
from nltk.corpus import twitter_samples
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
import pandas as pd

tw = Twitter()
tw.tweets(follow=['759251'], limit=10)  # see what CNN is talking about