import tweepy import os import networkx as nx import matplotlib.pyplot as plt # auth to twitter auth = tweepy.AppAuthHandler('xxxxxxxxxxxxx', 'xxxxxxxxxxx') api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) def getIndex(str, alist): for i in range(len(alist)): if alist[i] == str: return i return -99 fromuser = raw_input('Enter the username to start from: ') target = raw_input('Enter the target username: ') cursor = fromuser found = False G = nx.Graph() nameslist = [fromuser] while found == False: try: print cursor for follower in tweepy.Cursor(api.followers, screen_name=cursor).items(): if follower.screen_name not in nameslist: G.add_edge(cursor, follower.screen_name)
import codecs import tweepy import time import sys import csv import json from tweepy import OAuthHandler import nltk import re #required tokens for working with data from twitter using Tweepy access_token = "103589925-PYiNRi6sAoSAFCau7Q5zDAqF7Kt8WwsK5EunWL3I" access_token_secret = "u8N1nS93eN5npmtBOAxCwJgZE0W4wPCNe1CEuCB9lEIys" consumer_key = "IIFBxSZv8YnhRJuvvDJVkR4ht" consumer_secret = "zou3XOVMDXp9esingDNEowUeEPmTKkY4daZGYdmalovyd9JCxr" auth = tweepy.AppAuthHandler(consumer_key, consumer_secret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) #Authenticating with given tokens if (not api): print("Can't Authenticate") sys.exit(-1) def twitter_miner(query): tweetCount = 0 maxTweets = 200 neg = 0 pos = 0 cnt = 0 file1 = open('conf.txt', 'r', encoding="utf-8") file2 = open('ou.txt', 'w', encoding="utf-8")
import tweepy # Define ckey and csecret in keys.py with your application's key and secret. from keys import * import numpy as np import pandas as pd auth = tweepy.AppAuthHandler(ckey, csecret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): print("Can't Authenticate") sys.exit(-1) import sys import os searchQuery = '#impostersyndrome OR #impostorsyndrome OR imposter syndrome OR impostor syndrome' # this is what we're searching for maxTweets = 10000000 # Some arbitrary large number tweetsPerQry = 100 # this is the max the API permits fName = 'tweets.csv' # We'll store the tweets in a text file. # If results from a specific ID onwards are reqd, set since_id to that ID. # else default to no lower limit, go as far back as API allows # sinceId = None ## commented out by abc ## abc: 10/16/18: new code for incremental search of tweets to be added ## to a possibly existing file tweets.csv import csv exists = os.path.isfile(fName) if exists:
from config import tw_key, tw_secret, aws_key, aws_secret import tweepy import numpy as np import boto3 from random import randint import time import json RUNTIME = 1.95 * 60 * 60 * 24 #1.95 days in seconds auth = auth = tweepy.AppAuthHandler(tw_key, tw_secret) api = tweepy.API(auth) has_data_file = "data/has_data" save_file = "data/data_store.json" twitterID = np.dtype([("id", np.uint32), ("followers", np.uint32), ("friends", np.uint32)]) filename = "data/users_data" file = open(filename, "r") users_data = np.fromfile(file, twitterID, count=-1) file.close() numUsers = len(users_data) try: file = open(has_data_file, "r") has_data = list(np.fromfile(file, np.uint32, count=-1)) file.close() except: has_data = [] #client = boto3.client('dynamodb', region_name='us-west-2')
client = MongoClient('localhost', 27017) fake = 'nameDB' real = 'nameDB' mailto="emailAdress" db = client[fake] RTthreshold=500 mongoThresholdPerDoc=300000 #(actual 600000, but just in case of crashing we have 500K) ACCESS_TOKEN = sys.argv[3] ACCESS_SECRET = sys.argv[4] CONSUMER_KEY = sys.argv[1] CONSUMER_SECRET = sys.argv[2] auth = tweepy.AppAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) api = tweepy.API(auth, retry_count=10, retry_delay=5, retry_errors=set([103]), wait_on_rate_limit=True, wait_on_rate_limit_notify=True) def get_followers(user_id, tweetnumber, retweeter_num, total_retweeters): users = [] page_count = 0 try: for i, user in enumerate(tweepy.Cursor(api.followers_ids, id=user_id, count=5000).pages()):#gets 5000 users per page sys.stdout.write('\r') sys.stdout.write( 'Getting page %i for followers, for user: %i, for tweet number: %i, number of retweeters %i / %i' % ( i, user_id, tweetnumber, retweeter_num, total_retweeters)) sys.stdout.flush()
# main config file config = ConfigParser.ConfigParser() config.read(os.path.dirname(os.path.abspath( __file__)) + '/bbs-tweet-wall.cfg') # Twitter auth key (I like to keep seperate) auth = ConfigParser.ConfigParser() config.read(os.path.dirname(os.path.abspath( __file__)) + '/auth.cfg') # get Twitter auth values from config auth_key = config.get('auth', 'auth_key') auth_secret = config.get('auth', 'auth_secret') # setup Twtter oauth2 with values auth = tweepy.AppAuthHandler(auth_key, auth_secret) api = tweepy.API(auth) # Get Twitter list count from config file, make it an integer list_count = config.get('config', 'list_count') list_count_int = int(list_count) # Get format parameters from config tweet_max = int(config.get('config', 'tweet_max')) tweet_width = int(config.get('config', 'tweet_width')) tweet_lines = int(config.get('config', 'tweet_lines')) file_ext = config.get('config', 'file_ext') output_dir = config.get('config', 'output_dir') script_dir = config.get('config', 'script_dir') bgFileName = config.get('config', 'bgFileName') output_file = output_dir + '/' + 'tweets-all.' + file_ext
# Number of tweets per csv file NUM_TWEETS_PER_FILE = 250 # Number of data csv files needed MAX_NUM_DATA_FILES = 10000 / NUM_TWEETS_PER_FILE # Tokens and secrets for authentication for the Twitter API # These tokens were removed as they meant to be kept secret consumer_token = "" consumer_secret = "" access_token = "" access_token_secret = "" # Authenticates with the Twitter API auth = tweepy.AppAuthHandler(consumer_token, consumer_secret) # auth.set_access_token(access_token, access_token_secret) # Gets tweepy api wrapper # Sleep for 15 minutes because of the Twitter API rate limit # setting wait_on_rate_limit to True will make the program sleep automatically to avoid exceeding rate limits api = tweepy.API(auth, wait_on_rate_limit_notify=True, wait_on_rate_limit=True) # Stores hashes of all received tweets in a set to ensure there are no duplicates tweet_hash_set = set() # Stores the file number we are currently working with file_num = 0 # Stores number of tweets collected tweet_count = 0
from datetime import datetime from nltk.tokenize import word_tokenize import datetime as dt from shapely.geometry import Point, Polygon import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') nltk.download('punkt') nltk.download('stopwords') pd.set_option('display.max_columns', None) pd.set_option('display.max_colwidth', -1) app_key = '' app_key_secret = '' auth = tweepy.AppAuthHandler(app_key, app_key_secret) api = tweepy.API(auth) # Se leen los archivos. # In[2]: df = pd.DataFrame() path = 'data' filenames = os.listdir('data') print(f'Reading {len(filenames)} files ...') for i, filename in enumerate(filenames): df = df.append(pd.read_csv(path + '/' + filename), ignore_index=True, sort=True)
def connect_api(self): """this function connects the api""" self.auth = tweepy.AppAuthHandler(self.consumer_key, self.consumer_secret) self.api = tweepy.API(self.auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if not self.api: print("Problem Connecting to API")
# Extracting Data from Twitter # Pavan Narayanan, 2016 import sys import jsonpickle import tweepy auth = tweepy.AppAuthHandler("GET YOUR OWN AUTHETICATION", "SAME GOES HERE") api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) searchQuery = '"verizon"' maxTweets = 10000000 tweetsPerQry = 200 fName = 'tweets_' + searchQuery + '.txt' sinceId = None max_id = -1L tweetCount = 0 with open(fName, 'w') as f: while tweetCount < maxTweets: try: if max_id <= 0: if not sinceId: new_tweets = api.search(q=searchQuery, count=tweetsPerQry) else: new_tweets = api.search(q=searchQuery, count=tweetsPerQry, since_id=sinceId) else: if not sinceId:
def get_all_tweets(query1,query2): #Twitter only allows access to a users most recent 3240 tweets with this method #authorize twitter, initialize tweepy auth = tweepy.AppAuthHandler(consumer_key,consumer_secret) api = tweepy.API(auth, wait_on_rate_limit=True,wait_on_rate_limit_notify=True) #initialize a list to hold all the tweepy Tweets alltweets = [] #make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = api.search(q = query1,count=100) #save most recent tweets alltweets.extend(new_tweets) #save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 tweetCount = 100 #keep grabbing tweets until there are no tweets left to grab while len(new_tweets) > 0 and tweetCount < maxTweets: #all subsiquent requests use the max_id param to prevent duplicates new_tweets = api.search(q = query1,count=50,max_id=oldest) #save most recent tweets alltweets.extend(new_tweets) #update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 print("...%s tweets downloaded so far" % (len(alltweets))) tweetCount+=len(new_tweets) #make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = api.search(q = query2,count=100) #save most recent tweets alltweets.extend(new_tweets) #save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 tweetCount = 100 #keep grabbing tweets until there are no tweets left to grab while len(new_tweets) > 0 and tweetCount < maxTweets: #all subsiquent requests use the max_id param to prevent duplicates new_tweets = api.search(q = query2,count=50,max_id=oldest) #save most recent tweets alltweets.extend(new_tweets) #update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 print("...%s tweets downloaded so far" % (len(alltweets))) tweetCount+=len(new_tweets) #transform the tweepy tweets into a 2D array that will populate the csv outtweets = [[tweet.text] for tweet in alltweets] #write the csv with open('../data/lalinbdg_tweets.csv', 'w') as f: writer = csv.writer(f) writer.writerows(outtweets) pass
def main(): ''' :param argv: argv[1]: which api will be chosen, streaming/search. argv[2]: which city for this task, Melbourne or Sydney :return: ''' # Initialization... api_type = sys.argv[1] city = sys.argv[2] if city == 'Melbourne': geo_circle_info = GEO_CIRCLE_INFO_MELB which_file_path = './tweet_id_melbourne.txt' db_collection = 'tweets_melbourne' elif city == 'Sydney': geo_circle_info = GEO_CIRCLE_INFO_SYDNEY which_file_path = './tweet_id_sydney.txt' db_collection = 'tweets_sydney' else: print( 'You should choose either \'Melbourne\' or \'Sydney\' as the second para' ) return 0 # This handles Twitter authetification and the connection to Twitter Streaming API l = StdOutListener() auth1 = OAuthHandler(consumer_key, consumer_secret) auth1.set_access_token(access_token, access_token_secret) auth = tweepy.AppAuthHandler(API_KEY, API_SECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) tweet_count = 0 screen_names = set() if api_type == 'search': coll = get_db(db_collection) max_id = get_max_id() if not max_id: print "This is your first time to invoke this script ^_^" else: print "The task will start from tweet NO." + str(max_id) with open(which_file_path, 'a') as f: time_stamp = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time())) f.write(str(time_stamp) + '\n') while tweet_count < MAX_TWEETS: try: if (max_id <= 0): new_tweets = api.search(q="*", geocode=geo_circle_info, count=TWEETS_PER_QRY) else: new_tweets = api.search(q="*", geocode=geo_circle_info, count=TWEETS_PER_QRY, max_id=str(max_id - 1)) if not new_tweets: print("No more tweets found") oldest_id = new_tweets[-1].id f.write(str(oldest_id) + '\n') global count for tweet in new_tweets: json_str = json.dumps(tweet._json) json_o = json.loads(json_str) coll.insert(json_o) new_screen_name = json_o['user']['screen_name'] if new_screen_name not in screen_names: screen_names.add(new_screen_name) get_all_tweets_by_screen_name( new_screen_name, api, db_collection) count += 1 tweet_count += len(new_tweets) print("Download {0} tweets".format(tweet_count)) except tweepy.TweepError as e: print("ERROR FOUND: " + str(e)) f.close() elif api_type == 'streaming': while True: try: stream = Stream(auth1, l) stream.filter(locations=GEOBOX_MELBOURNE) except Exception as e: print e continue elif api_type == 'username': last_name = get_last_name() if last_name == -1: print "No name history has been found, start from the first name ^_^" else: print "This task will start from name: [%s]" % last_name f_n_h = open(file_path_names_history, 'a') flag = False with open(file_path_names, 'r') as f: for screen_name in f.readlines(): try: if last_name == -1: f_n_h.write(screen_name.strip() + '\n') get_all_tweets_by_screen_name(screen_name.strip(), api) elif not flag and last_name != -1: if screen_name.strip() != last_name.strip(): pass else: flag = True # f_n_h.write(screen_name.strip() + '\n') # get_all_tweets_by_screen_name(screen_name.strip(), api) else: f_n_h.write(screen_name.strip() + '\n') get_all_tweets_by_screen_name(screen_name.strip(), api) except Exception as e: print e continue else: print 'For the 1st para, neither \"streaming\" nor \"search\" has been correctly input'
import tweepy import sys import jsonpickle import os auth = tweepy.AppAuthHandler( "6cxqy2zGDG79IT0kzwl1VTZRt", "B11tACBDKtcpQ4jh9gRqmueIMxVqmkpwCYnYMEVJerqSR6eAyH") api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): print("Can't Authenticate") sys.exit(-1) def get_tweet(searchQuery, maxTweets=200): tweetsPerQry = 100 # Some arbitrary large number # If results from a specific ID onwards are reqd, set since_id to that ID. # else default to no lower limit, go as far back as API allows sinceId = None # If results only below a specific ID are, set max_id to that ID. # else default to no upper limit, start from the most recent tweet matching the search query. max_id = -1 c = 0 tweetCount = 0 tweets = [] tts = []
def calculatetweetsdemo(input_obtained, input_month): tweet_dict = {} name_not_entered = False print("called") if input_obtained == '': name_not_entered = True positive_count = 0 negative_count = 0 neutral_count = 0 consumer_key = 'pBQ6uagJoN3eksDl55bzaSepf' consumer_secret = 'NEA8UFjkf7325FhKWba02kgQJWSKmQLhCrXzWYyyyaQEXICNic' auth = tweepy.AppAuthHandler(consumer_key, consumer_secret) api = tweepy.API(auth, wait_on_rate_limit=True) tweet_list = [] final_month = 0 final_year = 2019 def calculate_sentiment(tweet): test_tweet = TextBlob(cleantweet(tweet)) if test_tweet.sentiment.polarity > 0: return 'positive' elif test_tweet.sentiment.polarity == 0: return 'neutral' else: return 'negative' def cleantweet(tweet): return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) | (\w+:\ / \ / \S+)", " ", tweet).split()) def gettweets(final_month, final_year): nonlocal months_not_entered nonlocal error x = re.findall("\D", str(input_month)) if x: error = True return if input_month == "": months_not_entered = True return entered_month = input_month nonlocal notweets print(entered_month) if entered_month == 0: notweets = True if entered_month > 24: error = True print(error) return todays_date = date.today() print(todays_date) todays_date = str(todays_date).split("-") current_month = int(todays_date[1]) if entered_month < current_month: final_month = current_month - entered_month elif 4 - entered_month <= 0: quotient = int(entered_month / current_month) final_year = final_year - quotient entered_month = entered_month % current_month final_month = 12 - entered_month print(str(final_year) + " " + str(final_month)) todays_day = todays_date[2] print(todays_day) nonlocal positive_count, neutral_count, negative_count repeated_count = 0 tweet_counter = 0 rootDir = os.path.dirname(os.path.abspath(__file__)) fh = open(rootDir + "/" + input_obtained + str(months) + "tweets" + ".txt", "w", encoding="utf-8") for tweet in tweepy.Cursor(api.search, q="#" + input_obtained, lang="en", count = 200, since=str(final_year) + "-" + str(final_month) + "-"+todays_day).items(): print("tweet " + str(tweet_counter) + " processed") sentiment_type = calculate_sentiment(tweet.text) tweets_no_repeat = {'text': tweet.text, 'sentiment': sentiment_type} if tweet.retweet_count > 0: if tweets_no_repeat not in tweet_list: tweet_list.append(tweets_no_repeat) fh.write(tweet.text.replace("\n","")+"--++=="+sentiment_type+"\n") if sentiment_type == 'positive': positive_count = positive_count + 1 elif sentiment_type == 'negative': negative_count = negative_count + 1 else: neutral_count = neutral_count + 1 print(tweet.text) else: if sentiment_type == 'positive': positive_count = positive_count + 1 elif sentiment_type == 'negative': negative_count = negative_count + 1 else: neutral_count = neutral_count + 1 repeated_count += 1 elif tweet.retweet_count == 0: tweet_list.append(tweets_no_repeat) fh.write(tweet.text.replace("\n", "") + "--++==" + sentiment_type+"\n") if sentiment_type == 'positive': positive_count = positive_count + 1 elif sentiment_type == 'negative': negative_count = negative_count + 1 else: neutral_count = neutral_count + 1 print(tweet.text) tweet_counter += 1 fh.close() total_tweets = positive_count + neutral_count + negative_count if total_tweets == 0: notweets = True print(str(positive_count) + " " + str(negative_count) + " " + str(neutral_count)) print(" " + str(total_tweets)) print("repeated ", repeated_count) print(len(tweet_list)) nonlocal tweet_dict tweet_dict = {'Tweets': tweet_list} df = pandas.DataFrame.from_dict(tweet_dict) rootDir = os.path.dirname(os.path.abspath(__file__)) df.to_csv(rootDir + "/" + input_obtained + str(months) + "tweets" + ".csv", index=False) error = False notweets = False months_not_entered = False gettweets(final_month, final_year) print(error) if error: return "invalid input" if notweets: return "no tweets" if months_not_entered and name_not_entered: return "name and months empty" if months_not_entered: return "months empty" if name_not_entered: return "name empty" if not error and not notweets: total_count = positive_count + neutral_count + negative_count if total_count == 0: return "total count zero" return str(positive_count)+","+str(negative_count)+","+str(neutral_count)
def get_tweets(request): if request.POST: import tweepy, sys, jsonpickle consumer_key = 'K6V0CCCWRTrmk582ETAepQ77q' consumer_secret = 'T9q1U8hO6AKG8znSbsvjR7gu6eCGvxR6d1S4KJjCrPI8vvzndz' qry = '@Telkomsel AND (pulsa OR sinyal OR harga OR kualitas OR kuota OR internet OR jaringan OR pelayanan)' maxTweets = 1000 # Isi sembarang nilai sesuai kebutuhan anda tweetsPerQry = 100 # Jangan isi lebih dari 100, ndak boleh oleh Twitter t = datetime.now() formatted_time = t.strftime('%d-%m-%y %H.%M') fname = 'Tweets_' + formatted_time auth = tweepy.AppAuthHandler(consumer_key, consumer_secret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): sys.exit( 'Autentikasi gagal, mohon cek "Consumer Key" & "Consumer Secret" Twitter anda' ) sinceId = None max_id = -1 tweetCount = 0 with open(fname + '.json', 'w') as f: while tweetCount < maxTweets: try: if (max_id <= 0): if (not sinceId): new_tweets = api.search(q=qry, count=tweetsPerQry, tweet_mode='extended') else: new_tweets = api.search(q=qry, count=tweetsPerQry, since_id=sinceId, tweet_mode='extended') else: if (not sinceId): new_tweets = api.search(q=qry, count=tweetsPerQry, max_id=str(max_id - 1), tweet_mode='extended') else: new_tweets = api.search(q=qry, count=tweetsPerQry, max_id=str(max_id - 1), since_id=sinceId, tweet_mode='extended') if not new_tweets: print( 'Tidak ada lagi Tweet ditemukan dengan Query="{0}"' .format(qry)) break for tweet in new_tweets: if (tweet._json['user']["name"] != "Telkomsel" and "?" not in tweet._json["full_text"] and tweet._json['metadata']["iso_language_code"] == "in"): f.write( jsonpickle.encode(tweet._json, unpicklable=False) + '\n') # text = tweet._json["full_text"] # text = re.sub(r"(?:\|https?| https?|https? \://)\S+", "", text) # character = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', # 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] # for i in range(len(character)): # charac_long = 5 # while charac_long >= 2: # char = character[i] * charac_long # text = text.replace(char, character[i]) # charac_long -= 1 # text = ' '.join(word.strip(string.punctuation) for word in text.split()) tweetCount += len(new_tweets) max_id = new_tweets[-1].id except tweepy.TweepError as e: print("some error : " + str(e)) break """messages.add_message(request, messages.INFO, 'Tweets telah tersimpan pada filename: {1}'.format(tweetCount, fname)) messages.add_message(request, messages.INFO, 'Jumlah Tweets telah tersimpan: %.0f' % tweetCount)""" fo = open(fname + '.json', 'r') fw = open(fname + '.txt', 'w') for line in fo: try: tweet = json.loads(line) text = ' '.join( word.strip(string.punctuation) for word in tweet['full_text'].split()) text = re.sub(r"(?:\@ | @|@|https?| https?|https? \://)\S+", "", text) character = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '.' ] for i in range(len(character)): charac_long = 5 while charac_long >= 2: char = character[i] * charac_long text = text.replace(char, character[i]) charac_long -= 1 #text = ' '.join(word.strip(string.punctuation) for word in text.split()) text = re.sub(r"\n", "", text) fw.write(text + "\n") except: continue import nltk testing_data = '/Users/achmed/Documents/data/testing.txt' pos_words = '/Users/achmed/PycharmProjects/positive.txt' neg_words = '/Users/achmed/PycharmProjects/negative.txt' def read_opinion_lexicon(fileName): dataFile = open(fileName, "r") word_set = set() for line in dataFile: line = line.strip() if line not in word_set: word_set.add(line) dataFile.close() return word_set positive_words = read_opinion_lexicon(pos_words) negative_words = read_opinion_lexicon(neg_words) fo = open(fname + '.txt', 'r') Result.objects.all().delete() global sentpos_count, sentneg_count, sentnet_count sentpos_count = 0 sentneg_count = 0 sentnet_count = 0 negation_words = [ 'tidak', 'bukan', 'gak', 'enggak', 'belum', 'ga', 'tdk', 'tak', 'malah' ] for line in fo: sentence = line kalimat = sentence.split() doc_words = [w.lower() for w in kalimat] pos_count = 0 neg_count = 0 for w in doc_words: if w in positive_words: if (doc_words[doc_words.index(w) - 1] in negation_words): neg_count -= 1 else: pos_count += 1 if w in negative_words: if (doc_words[doc_words.index(w) - 1] in negation_words): pos_count += 1 else: neg_count -= 1 sum = pos_count + neg_count if (sum == 0): classify_result = "netral" sentnet_count += 1 pos_score = 0.5 neg_score = 0.5 else: pos_score = pos_count / sum neg_score = neg_count / sum if (pos_score > neg_score): classify_result = "positif" sentpos_count += 1 else: classify_result = "negatif" sentneg_count += 1 sentiment2 = Result(sentiment=sentence, classification=classify_result) sentiment2.save() #classifier.show_most_informative_features() #test_corpus, _ = load_corpus(testing_data) #test_set_features = [(doc_features(d), c) for (d, c) in test_corpus] #print(nltk.classify.accuracy(classifier, test_set_features)) return render(request, 'sentiment_analysis.html', {'obj': Result.objects.all()})
def __init__(self) -> None: self.api_key = os.environ.get('TWITTER_API_KEY') self.api_secret = os.environ.get('TWITTER_API_SECRET') self.api_bearer = os.environ.get('TWITTER_API_BEARER') self.auth = tweepy.AppAuthHandler(self.api_key, self.api_secret) self.api = tweepy.API(self.auth)
# Twitter authentication part # Replace the API_KEY and API_SECRET with your application's key and secret. from http://apps.twitter.com # put your API_KEY and API_SECRET in twitappkeys.dat file which you will place in dirapikeys directory # specified one line bellow in first and second line dirapikeys = '/data/twitter/' with open(dirapikeys + 'twitappkeys.txt') as fkeys: lfkeys = fkeys.readlines() API_KEY = lfkeys[0].strip() API_SECRET = lfkeys[1].strip() maxTweets = 10000 # You can set this number to what ever you like value, # but keep in mind that twitter keeps records for searches publicly available via API to up to last 7 days tweetsPerQry = 100 # do not change this number auth = tweepy.AppAuthHandler(API_KEY, API_SECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): print ("Can't Authenticate") logging.debug("Can't Authenticate") sys.exit(-1) # Advice: don't change sinceId and max_id values # If results from a specific ID onwards set since_id to that ID. # else default to no lower limit sinceId = None, go as far back as API allows sinceId = None # If results only below a specific ID are, set max_id to that ID. # else default to no upper limit max_id = -1, start from the most recent tweet matching the search query. max_id = -1
def __init__(self, key, secret): self.auth = tweepy.AppAuthHandler(key, secret) self.api = tweepy.API(self.auth)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Mar 20 20:39:36 2021 @author: Ajit Johnson Nirmal Twitter Post Finder """ # Lib import tweepy import datetime import pandas as pd # Authentication auth = tweepy.AppAuthHandler( 'VhVYh9RVGivvFanwDr7kg8qBt', 'oXG5rJDopPVgThiq6xEU3fKNonpkDTFMvazQjSvgkw66NeuVsf') # test api = tweepy.API(auth) # Run Function pages = [ 'NewsfromScience', 'ScienceNews', 'newscientist', 'Immunoscope', 'nature', 'NatureMedicine', 'nresearchnews', 'natBME', 'NatureBiotech', 'NatureComms', 'NatImmunol', 'NatureNews', 'naturemethods', 'NatureHumBehav', 'NatureAstronomy', 'NatureGenet', 'NatureNV', 'NatureClimate', 'NatureCellBio', 'CellCellPress', 'TrendsCellBio', 'Dev_Cell', 'CellSystemsCP', 'CellPressNews', 'TrendsMolecMed', 'TrendsImmuno', 'MolecularCell', 'CellStemCell', 'NeuroCellPress', 'Cancer_Cell', 'cellhostmicrobe', 'JCellBiol', 'NewsfromScience',
import csv import itertools import tweepy import os from dotenv import load_dotenv # Load environement variables containing Twitter API keys load_dotenv() # Authorize Tweepy API auth = tweepy.AppAuthHandler(os.getenv('CONSUMER_KEY'), os.getenv('CONSUMER_SECRET_KEY')) api = tweepy.API(auth) # Generate the search query based on all combinations of these phrases names = ['Adia Barnes', "Arizona coach"] modifiers = ['mother', 'breastfeeding', 'pumping', 'nursing', 'normalize'] query = ' OR '.join( map(lambda x: x[0] + ' ' + x[1], itertools.product(names, modifiers))) # Open a file for writing the tweets to with open('tweets.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow( ['text', 'id', 'created', 'is_retweet', 'favorites', 'retweets']) for tweet in tweepy.Cursor(api.search, q=query, lang='en-us').items(): writer.writerow([ tweet.text.encode(encoding='UTF-8', errors='strict'), tweet.id, tweet.created_at, tweet.text.startswith('RT @'), tweet.favorite_count,
def __init__(self): self.nlp = spacy.load('en_core_web_lg') self.contraction_mapping = { "ain't": "is not", "aren't": "are not", "can't": "cannot", "can't've": "cannot have", "'cause": "because", "could've": "could have", "couldn't": "could not", "couldn't've": "could not have", "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not", "he'd": "he would", "he'd've": "he would have", "he'll": "he will", "he'll've": "he will have", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have", "I'm": "I am", "I've": "I have", "i'd": "i would", "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have", "i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have", "it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have", "mightn't": "might not", "mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have", "o'clock": "of the clock", "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have", "so's": "so as", "this's": "this is", "that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would", "there'd've": "there would have", "there's": "there is", "here's": "here is", "they'd": "they would", "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would", "y'all'd've": "you all would have", "y'all're": "you all are", "y'all've": "you all have", "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have" } self.keys = get_config_from_json('..//Keys//keys.json') self.auth = tw.AppAuthHandler(self.keys.twitter_keys.consumer_key, self.keys.twitter_keys.consumer_secret) self.api = tw.API(self.auth, wait_on_rate_limit=True) limit = self.api.rate_limit_status() limit = DotMap(limit) print(limit.resources.search)
def standard_search_crawler(max_tweets, area, tweet_file, user_list): auth = tweepy.AppAuthHandler( "ujqGAvWCv9C14893iDGd8aUfb", "7VQIxQ8cQJ0mQQwre9nRG0uJ2LEBRlHmOcPUpjHPHO9DmsoDTE") api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if not api: print("Can't Authenticate") search_query = 'place:0ec0c4fcacbd0083' tweets_per_qry = 100 # this is the max the API permits tweet_count = 0 print("Downloading max {0} tweets".format(max_tweets)) with open(tweet_file, 'a', encoding='utf-8') as load_f: with open(user_list, 'a', encoding='utf-8') as load_f1: while tweet_count < max_tweets: new_tweets = api.search(q=search_query, count=tweets_per_qry) if not new_tweets: print("No more tweets found") break for tweet_status in new_tweets: twi_dict = {} tweet = tweet_status._json twi_dict_x = None twi_dict_y = None twi_dict_t = None if tweet.get('text'): if tweet['lang'] == 'en': twi_dict_t = tweet['text'] if tweet.get('coordinates'): twi_dict_x = tweet['coordinates']['coordinates'][0] twi_dict_y = tweet['coordinates']['coordinates'][1] if tweet.get( 'geo' ) and twi_dict_x is None and twi_dict_y is None: twi_dict_x = tweet['geo']['coordinates'][1] twi_dict_y = tweet['geo']['coordinates'][0] if tweet.get('place'): twi_dict_x = get_coord_by_box( tweet['place']['bounding_box']['coordinates'] [0])[0] twi_dict_y = get_coord_by_box( tweet['place']['bounding_box']['coordinates'] [0])[1] if twi_dict_x is not None and twi_dict_y is not None and twi_dict_t is not None and allocate_tweet( twi_dict_x, twi_dict_y, area) is not None: twi_dict["_id"] = tweet.get('id') twi_dict["x_coord"] = twi_dict_x twi_dict["y_coord"] = twi_dict_y twi_dict["text"] = twi_dict_t twi_dict["area"] = allocate_tweet( twi_dict_x, twi_dict_y, area) twi_dict['hashtags'] = tweet['entities']['hashtags'] twi_dict['time'] = tweet['created_at'] load_f.write(json.dumps(twi_dict) + "\n") tweet_count += 1 line = {'id': tweet['user']['id_str']} load_f1.write(json.dumps(line) + '\n') load_f1.close() load_f.close() notice = "Downloaded {0} tweets, Saved to {1}".format(tweet_count, 'disk') return notice
# movienames='#moana OR #doctorstrange OR #allied OR #arrivalmovie OR #badsanta2 OR #almostchristmasmovie OR #assassinscreed OR #collateralbeauty OR #fantasticbeastsandwheretofindthem OR #jackie OR #lalaland OR #passengers OR #rogueonestarwarsstory OR #sing' # tagsToSearch = '#CambMA' print(tagsToSearch) #----------------------------------------------------------------------- # load API credentials #----------------------------------------------------------------------- config = {} # execfile("config.py", config) exec(compile(open("config.py", "r").read(), "config.py", 'exec'), config) #----------------------------------------------------------------------- # create twitter API object #----------------------------------------------------------------------- # Setup tweepy to authenticate with Twitter credentials: auth = tweepy.AppAuthHandler(config["consumer_key"], config["consumer_secret"]) # auth.set_access_token(config["access_token"], config["access_token_secret"]) # Create the api to connect to twitter with your creadentials api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True) # Create a producer to write json messages to kafka #producer = KafkaProducer(bootstrap_servers=['kafka:9092'], # default is localhost:9092 # value_serializer=lambda v: json.dumps(v).encode('utf-8')) try: producer = KafkaProducer( bootstrap_servers=[ "" + os.environ['HOSTNAME'] + ":" + os.environ['PORT'] + ""
def tweet_to_db(searchQuery, start, end, tweetsPerQry=100, maxTweets=100000000): """ This function pulls tweets with hashtag searchQuery from a specified time period. tweetsPerQry = 100 is the maximal number of tweets allowed by Twitter per query. maxTweets is some arbitrary big number. Tweets are instantly inserted into an SQL database with one table 'tweets', which has the following 4 columns: id - unique identifier tweet_id - BIGINT, unique tweet id datetime - TEXT, datetime in format '%Y-%m-%d %H:%M:%S' content - TEXT, tweet content """ # Obtain keys with open('/home/ubuntu/twitter_oauth.txt') as oauth: keys = oauth.readlines() consumer_key, consumer_secret, access_token = [x.strip() for x in keys] # Replace the API_KEY and API_SECRET with your application's key and secret. auth = tweepy.AppAuthHandler(consumer_key, consumer_secret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) startSince = start.strftime("%Y-%m-%d") endUntil = (end + datetime.timedelta(days=1)).strftime("%Y-%m-%d") end_tweet = api.search(q=searchQuery, count=1, until=endUntil)[0] start_tweet = api.search(q=searchQuery, count=1, until=startSince)[0] tweetCount = 0 # Identify ending id of tweets within timeframe using a binary search # This unfortunately is not available directly through API while end_tweet.created_at - end > datetime.timedelta(0, 5, 0): mid_id = int((end_tweet.id + start_tweet.id) / 2) # Grab 10 tweets just to make sure they are not all zeros mid_tweet = api.search(q=searchQuery, count=10, max_id=mid_id)[0] if end - mid_tweet.created_at > datetime.timedelta(0, 5, 0): start_tweet = mid_tweet else: end_tweet = mid_tweet max_id = end_tweet.id # Create db to store results with open('/home/ubuntu/rds_keys.txt') as rds_keys: keys = rds_keys.readlines() host, dbname, rds_user, rds_pw = [x.strip() for x in keys] con = psycopg2.connect(host=host, dbname=dbname, user=rds_user, password=rds_pw, port='5432') cur = con.cursor() create_table = """ CREATE TABLE IF NOT EXISTS tweets (id SERIAL PRIMARY KEY, tweet_id BIGINT, hashtag TEXT, datetime TEXT, content TEXT); """ cur.execute(create_table) con.commit() cur.execute("SELECT tweet_id FROM tweets WHERE hashtag = %s", [searchQuery]) if not cur.rowcount: unique_ids = set() else: unique_ids = set([x[0] for x in cur.fetchall()]) insert_tweet = """ INSERT INTO tweets(tweet_id, hashtag, datetime, content) VALUES (%s, %s, %s, %s) """ while tweetCount < maxTweets: try: new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang='en', max_id=str(max_id - 1), since=startSince, until=endUntil) if not new_tweets: print("No more tweets found") break if new_tweets[-1].created_at < start: print("Exhausted time interval.") break for tweet in new_tweets: if tweet.id not in unique_ids: unique_ids.add(tweet.id) tweet_datetime = tweet.created_at tweet_datetime_str = tweet_datetime.strftime( '%Y-%m-%d %H:%M:%S') tweet_list_insert = [ tweet.id, searchQuery, tweet_datetime_str, unicode(tweet.text).encode('ascii', 'replace') ] cur.execute(insert_tweet, tweet_list_insert) con.commit() tweetCount += len(new_tweets) max_id = new_tweets[-1].id except tweepy.TweepError as e: time.sleep(180) continue print("Total number of tweets: %s", tweetCount)
messages.append(message) with open('data.txt', 'a') as file: for m in messages: pickle.dump(m, file, pickle.HIGHEST_PROTOCOL) tweetCount += len(new_tweets) print("Downloaded {0} tweets".format(tweetCount)) max_id = new_tweets[-1].id except tweepy.TweepError as e: # Just exit if any error print("some error : " + str(e)) break print("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName)) # Replace the API_KEY and API_SECRET with your application's key and secret. auth = tweepy.AppAuthHandler(config[0], config[1]) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): print("Can't Authenticate") sys.exit(-1) # Continue with rest of code # Fetch search terms from `Search.txt` with open('Search.txt', 'r') as file: search_list = file.readlines() search_list = [x.strip() for x in search_list] for term in search_list:
import urllib import json import tweepy from flask import Flask, jsonify from tweepy.parsers import JSONParser from multiprocessing import Process, Queue app = Flask(__name__) TWITTER_API_KEY = "Replace this string with your twitter api key" TWITTER_API_SECRET = "Replace this string with your twitter api secret key" GOOGLE_API_KEY = "Replace this string with your google api key" GOOGLE_API_CX = "Replace this string with your google api cx" auth = tweepy.AppAuthHandler(TWITTER_API_KEY, TWITTER_API_SECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, parser=tweepy.parsers.JSONParser() ) if (not api): print ("Can't Authenticate Twitter") sys.exit (-1) #For storing the results of the three following parallel functions result_queue = Queue() #DuckDuckGo instant API
from sklearn.externals import joblib import tweepy from threading import Thread from flask_sqlalchemy import SQLAlchemy from models import * app = Flask(__name__) app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///gbrannotation.sqlite3' app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.secret_key = os.urandom(24) db = SQLAlchemy(app) thread = None clf = joblib.load(os.path.join(config.APP_STATIC, 'gbr_multi_label.pkl')) mlb = joblib.load(os.path.join(config.APP_STATIC, 'mlb.pkl')) auth = tweepy.AppAuthHandler(config.CONSUMER_KEY, config.CONSUMER_SECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) SEARCH_TERM = ['Great Barrier Reef', 'GBR', 'greatbarrierreef'] auth = tweepy.AppAuthHandler(config.CONSUMER_KEY, config.CONSUMER_SECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) def predict(sentence): predicted = clf.predict(sentence) inverse_pred = mlb.inverse_transform(predicted) return inverse_pred def background_thread(): """Example of how to send server generated events to clients."""
def __init__(self): # Authenticate to the twitter api self.auth = tweepy.AppAuthHandler(os.environ['TWITTER_API_KEY'], os.environ['TWITTER_API_SECRET']) self.api = tweepy.API(self.auth, wait_on_rate_limit=True)
import tweepy # Replace the API_KEY and API_SECRET with your application's key and secret. auth = tweepy.AppAuthHandler( "sFH28qEj9uf9zWt8Ecws9h8jS", "0lkJlb2jBVNeahQ40EZs84W7mhjmXLWF12AGQpqDBI8yj1pffY") api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): print("Can't Authenticate") sys.exit(-1) # Continue with rest of code import sys import jsonpickle import os searchQuery = ['Hillary', 'Clinton', '#imwithher'] # this is what we're searching for maxTweets = 10000000 # Some arbitrary large number tweetsPerQry = 100 # this is the max the API permits fName = 'hillary_tweets.txt' # We'll store the tweets in a text file. # If results from a specific ID onwards are reqd, set since_id to that ID. # else default to no lower limit, go as far back as API allows sinceId = 725171200100433920 # If results only below a specific ID are, set max_id to that ID. # else default to no upper limit, start from the most recent tweet matching the search query. #max_id = -1L
def get_data(): # ATOKEN = open("/Users/mmiyazaki/Documents/My project/Airline_analysis/src/get_data/credentials/atoken.txt","r").read() # ASECRET = open("/Users/mmiyazaki/Documents/My project/Airline_analysis/src/get_data/credentials/asecret.txt","r").read() CKEY = open( "/Users/mmiyazaki/Documents/My project/Airline_analysis/src/get_data/credentials/ckey.txt", "r").read() CSECRET = open( "/Users/mmiyazaki/Documents/My project/Airline_analysis/src/get_data/credentials/csecret.txt", "r").read() with open( "/Users/mmiyazaki/Documents/My project/Airline_analysis/src/data/companies.txt" ) as file_in: companies = [] for line in file_in: companies.append(line.replace('\n', "")) auth = tweepy.AppAuthHandler(CKEY, CSECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): print("Can't Authenticate") sys.exit(-1) df = pd.DataFrame() for company in companies: print(company) searchQuery = company + " -RT" # this is what we're searching for print("search query :", searchQuery) maxTweets = 200 # Some arbitrary large number tweetsPerQry = 100 # this is the max the API permits # tweepy_REST_API = dataiku.Folder("tweepy_REST_API") # folder_path = tweepy_REST_API.get_path() # fName = folder_path + '/tweets.txt' # We'll store the tweets in a text file. # If results from a specific ID onwards are reqd, set since_id to that ID. # else default to no lower limit, go as far back as API allows sinceId = None # If results only below a specific ID are, set max_id to that ID. # else default to no upper limit, start from the most recent tweet matching the search query. max_id = -1 tweetCount = 0 print("Downloading max {0} tweets".format(maxTweets)) # with open(fName, 'w') as f: while tweetCount < maxTweets: try: if (max_id <= 0): if (not sinceId): new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang="en") else: new_tweets = api.search(q=searchQuery, count=tweetsPerQry, since_id=sinceId, lang="en") else: if (not sinceId): new_tweets = api.search(q=searchQuery, count=tweetsPerQry, max_id=str(max_id - 1), lang="en") else: new_tweets = api.search(q=searchQuery, count=tweetsPerQry, max_id=str(max_id - 1), since_id=sinceId, lang="en") if not new_tweets: print("No more tweets found") break for tweet in new_tweets: # f.write(jsonpickle.encode(tweet._json, unpicklable=False) + '\n') if len(tweet._json['entities']['urls']) == 0: links = "" else: links = tweet._json['entities']['urls'][0][ "expanded_url"] if tweet._json["place"] == None: country = "" else: country = tweet._json["place"]['country'] if tweet._json["geo"] != None: coordinates = str(tweet._json["geo"]['coordinates']) else: coordinates = "" new_row = pd.DataFrame({ 'timestamp': tweet._json['created_at'], 'tweet_id': [tweet._json["id"]], 'text': tweet._json["text"], 'hashtags': [tweet._json["entities"]["hashtags"]], "links": links, # 'user_mentions':tweet._json["entities"]["user_mentions"][0]["screen_name"], # 'user_mentions_id':tweet._json["entities"]["user_mentions"][0]["id"], # 'user_mentions_indices':[tweet._json["entities"]["user_mentions"][0]["indices"]], 'in_reply_to_status_id': tweet._json["in_reply_to_status_id"], 'in_reply_to_user_id': tweet._json["in_reply_to_user_id"], 'in_reply_to_screen_name': tweet._json["in_reply_to_screen_name"], 'user_id': tweet._json["user"]["id"], 'username': tweet._json["user"]["name"], 'screen_name': tweet._json["user"]["screen_name"], 'user_location': tweet._json["user"]["location"], 'followers_count': tweet._json["user"]["followers_count"], 'friends_count': tweet._json["user"]["friends_count"], 'user_creation': tweet._json["user"]["created_at"], 'favourites_count': tweet._json["user"]["favourites_count"], 'coordinates': coordinates, # 'geo':tweet._json["geo"], 'country': country, 'retweets': tweet._json["retweet_count"], 'retweeted': tweet._json["retweeted"], 'lang': tweet._json["lang"] }) new_row["company"] = company df = df.append(new_row, ignore_index=True) tweetCount += len(new_tweets) print("Downloaded {0} tweets".format(tweetCount)) max_id = new_tweets[-1].id except tweepy.TweepError as e: # Just exit if any error print("some error : " + str(e)) break print("Downloaded {0} tweets for {1}".format(tweetCount, company)) df.to_csv( "/Users/mmiyazaki/Documents/My project/Airline_analysis/src/data/raw_tweets.csv" )