def download_tweets(self): # Authentifikation bei der Twitter API consumer_key = "***" consumer_secret = "***" access_token = "***" access_token_secret = "***" auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # Festlegen des Suchbegriffs und der Größe der Rückgabemenge search_term = "#ExtinctionRebellion" quantity_tweets = 100 # Definition der Suchparameter self.tweets = tweepy.Cursor( api.search, q=search_term, lang="en", tweet_mode="extended" ).items(quantity_tweets) # Speichern der erhaltenen Tweets und ihrer Attribute in der Datenbank sql = SQLData() for api_tweet in self.tweets: # wenn Textinhalt mit "RT" beginnt diesen Tweet nicht speichern if not api_tweet.full_text.startswith("RT"): tweet = Tweet( api_tweet.id, api_tweet.created_at, str(datetime.datetime.now()), str(datetime.datetime.now()), api_tweet.user.screen_name, api_tweet.full_text, ) sql.insert_tweet(tweet) else: print(f"Tweet mit der ID={api_tweet.id} ist ein Retweet")
def display_create_tweet(): # Si la méthode est de type "GET" if request.method == 'GET': # On affiche notre formulaire de création return render_template('create_tweet.html') else: # Sinon, notre méthode HTTP est POST # on va donc créer un nouveau tweet # récupération du nom de l'auteur depuis le corps de la requête authorName = request.form['author'] # récupération du contenu depuis le corps de la requête content = request.form['content'] # Création d'une variable image par défaut vide. image = None # récupération de l'image depuis le corps de la requête f = request.files['image'] # Si il y a bel et bien une image d'uploadé if f.filename != '': # On construit le chemin de destination de notre image (où est-ce qu'on va la sauvegarder) filepath = os.path.join(app.root_path, 'static', 'uploads', f.filename) # On sauvegarde notre image dans ce chemin f.save(filepath) # création de l'url de l'image pour son affichage (à l'aide de son nom) image = url_for('static', filename='uploads/' + f.filename) # Création d'un tweet à l'aide de notre constructeur (qui se trouve dans le fichier tweet.py) tweet = Tweet(authorName, content, image) # Insertion de notre tweet en première position dans notre tableau tweets.insert(0, tweet) # Redirection vers la liste de nos tweets return redirect(url_for('display_tweets'))
def collect_tweets(self): """ # of hashtags per tweet # of tweets with hashtags # mentions per tweet # of tweets with mentions URLs per tweet Tweets with URLs # special characters per tweet # Tweets with special characters Retweets by user Inter-tweet content similarity: Bag of Words w/ Jaccard and cosine similarity Duplicate tweets Duplicate URLs ratio (1-unique URLs/total URLs) Duplicate Domains Ratio (1-unique domains/total domains) Duplicate Mentions Ratio (1-unique mentions/ total mentions) Duplicate hashtags ratio (1-unique hashtags/total hashtags) """ for twt in self.f: tweet = Tweet() tweet.get_features(twt) self.tweets.append(tweet) self.tweet_timings.append(tweet.date) self.tweet_text.append(tweet.html_text) self.app_sources.append(tweet.source) self.retweet_sources.append(tweet.rts) for url in tweet.urls: self.urls.append(url['expanded_url']) self.domains.append(url['display_url'].split('/')[0]) for mention in tweet.mentions: self.mentions.append(mention['id']) for hashtag in tweet.hashtags: self.hashtags.append(hashtag['text'])
def tweets(self): tweets = [] for tweet_obj in self.db: tweets.append(Tweet(tweet_obj)) return tweets
def get_tweets(self, count, lang, **kwargs): """ Get's tweets from twitter and returns them in a list. By default returns a single tweet in english. To change language, pass the language code in the parameters. Args: count: Number of tweets to be returned. lang: Language flag by default its set to english (en), pass the language code to change language. **kwargs: Arbitrary keyword arguments. Returns: List of tweets, in json format. """ tweets = [] try: if "filters" in kwargs: stream = self._get_filter(kwargs["filters"], kwargs) else: stream = self._get_iterator() for tweet in stream: tweets.append(Tweet(json.dumps(tweet))) if count - 1 <= 0: break return tweets except Exception as e: raise e
def download_tweets(query_phrase, tweet_count): import api_keys import tweepy from datetime import datetime from tweet import Tweet # authorization auth = tweepy.AppAuthHandler(api_keys.API_KEY, api_keys.API_SECRET) api = tweepy.API( auth, wait_on_rate_limit=True, # wait until the limit is replenished wait_on_rate_limit_notify=True) # reply with a message if the limit is reached # check if not authorized if (not api): print("Can't Authenticate") return tweets = [] for status in tweepy.Cursor(api.search, q=query_phrase, tweet_mode='extended', lang='en').items(tweet_count): try: full_text = status._json['retweeted_status']['full_text'] except: full_text = status._json['full_text'] ts = datetime.strptime(status._json['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweets.append(Tweet(full_text, ts)) return tweets
def _get_tweets_by_query(self, query: str, searchKey: str) -> list: """ Recieves tweets from database with given query. Args: `query` (str): SQLite query that will be executed (e.g. `"SELECT * FROM Tweet"`) `searchKey` (str): search key Returns: A list that contains Tweet instances created from executed `query` """ self.c.execute(query) tweets = [] for row in self.c.fetchall(): tweets.append(Tweet(tweet_id=row[0], writer=row[1], post_date=time.localtime(row[2]), body=row[3], searchKey=searchKey, comment_num=row[4], retweet_num=row[5], like_num=row[6])) return tweets
def post(self): self.response.headers["Content-Type"] = "text/html" share_text = self.request.get("share_text") share_image = self.request.get("share_image") if share_text != None or share_text != "": share_type = self.request.get("share_type") if share_type == "Update": edit_tweet_id = self.request.get("edit_tweet_id") edit_tweet = Services().get_tweet(tweet_id=edit_tweet_id) edit_tweet.share_text = share_text edit_tweet.put() else: myuser = Services().get_login_user() tweet = Tweet(share_text=share_text, user_id=myuser.key.id(), user_name=myuser.user_name, time=datetime.datetime.now()) tweet.put() myuser.tweet_ids.append(tweet.key.id()) myuser.put() self.redirect("/")
def tweetScraper(driver): try: tweeterDivs = driver.page_source obj = BeautifulSoup(tweeterDivs, "html.parser") #extracting the page source allTweets = obj.find_all("div", class_="tweet") tweetList = [] for tweet in allTweets: tweetText = tweet.find( "p", class_="tweet-text").getText() #extracting tweet text screenName = tweet.find( "strong", class_="fullname").getText() #extracting Screen name username = tweet.find( class_="username").getText() #extracting username tweetId = tweet['data-tweet-id'] #extracting tweet id userId = tweet['data-user-id'] #extracting user id timestamp = tweet.find( class_="tweet-timestamp")['title'] #extracting tweet time timestamp = datetime.datetime.strptime( timestamp.split('-')[1], " %d %b %Y") timestamp = str(timestamp.date()) tweetList.append( Tweet(tweetId, tweetText, username, userId, screenName, timestamp)) except Exception as e: print("Something went wrong!") driver.quit() return tweetList
def getTestdata(search="*", count=1, emojis=None): auth = tweepy.OAuthHandler( "v22l2KMtXLJY3ZTiEpNyRyLUj", "2GCvf1ul33i0eyGyNq6Uo6oWeSL4gmUfyghnlFKHxMU9D0SyuL") auth.set_access_token("887755823522340865-8F9qeIWfm6fzYPpI4mJVXXq1iuFgCcm", "VgLvgj015uajs3vzHdX3vSi3jIPNfZP03flzI7CIjOtqk") api = tweepy.API(auth) search = "sad" itemlimit = count for status in tweepy.Cursor(api.search, lang="en", q=search, tweet_mode="extended", since_id=1).items(itemlimit): # process status here # print status.entities["hashtags"] if "retweeted_status" in dir(status): tweet = status.retweeted_status.full_text else: tweet = status.full_text t1 = Tweet(tweet) t1.processTweet(emojis=emojis) t1.printer()
def getTraindata(bpfile="Datasets/Train/Sentiment Analysis Dataset.csv", mpfile="Datasets/Train/smileannotationsfinal.csv", mode="mp", emojis=None): mpdata = [] bpdata = [] if mode == "mp": file = mpfile else: file = bpfile fp = open(file, "r") for line in fp: tokens = line.split(',') labels = tokens[2].split('|') if labels[0] in Emotions: label = Emotions[labels[0]] t1 = Tweet(tokens[1], label) t1.processTweet(emojis=emojis) mpdata.append(t1) fp.close() return mpdata
def fetch_tweet(self, tweet_id: int): tweet_set = self.r.hgetall('tweet:' + str(tweet_id)) user_id = tweet_set.get('user_id') timestamp = tweet_set.get('timestamp') tweet_txt = tweet_set.get('tweet_txt') return Tweet(user_id, timestamp, tweet_txt)
def createATweet(self,tweet): try: newT = Tweet(tweet) return newT except Exception as e: print (e) return None
def get_tweets(search_string, result_type, time_range): api = twitter.Api( consumer_key='7SVoyHlwYgm90Y5HSzmTzUQ9O', consumer_secret='D0crcrKca9S3TXuqGRPYhzBN0LJut34MecER8Ly8fb3xrM0Gja', access_token_key='1199488399238926336-1jV9xq8bs4zdP5qiq96cUwP5GF1Fuz', access_token_secret='71hibaH8BPkPwCytm5CH9N4RJonaRCrSKUqG9y3dwo2Ix') tweets = {} search = "q=" + str(search_string) + "%20&result_type=" + str( result_type) + "&since=" + str(time_range) + "&count=100" print(search) results = api.GetSearch(raw_query=search) i = 0 for result in results: json_result = json.loads(str(result)) t = Tweet(json_result['user']['profile_image_url'], json_result['user']['name'], json_result['text'], json_result['created_at'], json_result['hashtags']) if 'retweet_count' in json_result: t.retweet_count = json_result['retweet_count'] if 'favorite_count' in json_result: t.favorite_count = json_result['favorite_count'] tweets[i] = t i += 1 return tweets
def _cast_row_to_tweet(self, row): tweet = Tweet(row[0], row[1], row[2], row[3], row[4], row[5]) tweet.label = row[6] tweet.clean_text = row[7] tweet.tb_polarity = row[8] tweet.nb_polarity = row[9] return tweet
def average_degree(self): try: stats_fh = open(self.output_file, 'w') except IOError: print 'Cannot open', self.output_file try: tweet_fh = open(self.input_file) except IOError: print 'Cannot open', self.input_file else: tweets = tweet_fh.readlines() graph = tweet_graph() for tweet_line in tweets: tweet_dec = json.loads(tweet_line) # Ignore tweets with "limit" if "limit" in tweet_dec.keys(): continue cur_tweet = Tweet(tweet_dec) hashtags = cur_tweet.get_hashtags() cur_ts = datetime.strptime(cur_tweet.get_timestamp(), self.format) # Ignore tweets with one or zero hashtags # It will only be used to evict old tweets from the graph if (len(hashtags) >= 2): for hashtag in hashtags: graph.add_vertex(hashtag, cur_ts) edges = self.pairwise(hashtags) for edge in edges: graph.add_edge(graph.get_vertex(edge[0]), graph.get_vertex(edge[1]), cur_ts) else: graph.evict(cur_ts) av_degree = graph.average_degree() stats_fh.write(("%0.2f" % av_degree) + "\n") if self.tracker_en: self.ad_tracker.append(av_degree) (peak_degree, peak_node) = graph.peak_degree() self.pd_tracker.append(peak_degree) self.pn_tracker.append(peak_node) if self.self_checking: if not graph.check_graph(cur_ts): print "Self Checking Failed at " + str(cur_ts) tweet_fh.close() if not stats_fh.closed: stats_fh.close()
def test_filter1( self ): #This test should pass, and is meant to establish a baseline. tweet_1 = Tweet("@trapkingwillie", 1, 12, "PM", "Eastern Time (US & Canada)", 10, 15, "This should pass", "n/a") self.assertEqual(tweet_1.msg, "This should pass")
def make_post(tweet_data, curr_user): # t_data_split = tweet_data.split(':') '''tweet_data contains: tweet_data[0] == "POST": used for menu checking commands tweet_data[1] == Author of tweet tweet_data[2] == Tweet message: the tweet content string tweet_data[3] == hashtags: the hash tags separated by space ''' tweet_author = curr_user tweet_content = tweet_data[2] htags = tweet_data[3].split() #construct the tweet to add tweet_to_add = Tweet(tweet_author, tweet_content, htags) #add to user's tweet list curr_user.tweets.append(tweet_to_add) #add to global tweet list allTweets.append(tweet_to_add) #add to follower's list to display for f in tweeterlist: if f in curr_user.subs: if f.status is False: f.offline_tweets.append(tweet_to_add) else: f.tweets.append(tweet_to_add) sdata = "SUCCESS:POST" return sdata
def lambda_handler(event, context): try: # Get data from the database raw_data = get_words_from_database() # Transform the raw data data = transform_data_from_database(raw_data) # Get the keys needed for the Twitter API access_keys = get_twitter_account_info() # Create a new Tweet instance new_tweet = Tweet(access_keys) # Get last twenty timeline posts timeline_posts = new_tweet.get_last_twenty_posts() # Get a random word message = get_random_word(data, timeline_posts) print(f'message to post = {message}') # Post new message to Twitter account new_tweet.create_new_post(message) except Exception as error: message = f'Error occurred during invocation of lambda function. Error = {error}' print(message)
def fetch(query, output_file=sys.stdout, debug_file=None, lang="en", geocode="", max_count=500000): ''' Fetches query results into output_files, and prints raw json results into debug_file ''' auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = API(auth, retry_count=10, retry_delay=15, timeout=60, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) print("QUERY:[", query, "]", "OUTPUT:", output_file.name, file=output_file) count = 0 ok_count = 0 for result in Cursor(api.search, q=query, lang=lang).items(max_count): if debug_file: print(result.text + "\n", file=debug_file) tweet = Tweet(result.text) t = tweet.preprocess() if t and tweet.isTagged(): print(",".join(tweet.hashtags) + "\t" + t, file=output_file) ok_count += 1 count += 1 if count % 1000 == 0: print("tweets saved:", ok_count, "/", count) print("Loop end:", ok_count, "/", count, "tweets saved")
def loadTweets(filename): """ Load tweets from a filename. Returns a list of Tweet objects. """ tweets = open(filename, 'r').read().splitlines() print "Loading %d tweets from %s ..." % (len(tweets), filename) tweetObjects = [] for tweet in tweets: try: js = json.loads(tweet) if (not ('place' in js)) or js['place'] == None: continue elif (not ('full_name' in js['place'])): continue elif (not ('geo' in js)) or js['geo'] == None: continue elif (not ('coordinates' in js['geo'])): continue coords = js['geo']['coordinates'] place = js['place'] tweetObject = Tweet(js['text'], place['full_name'], coords[0], coords[1], place['country'], js['created_at']) tweetObjects.append(tweetObject) except ValueError: pass print "Loaded %d tweets" % len(tweetObjects) return tweetObjects
def main(): t = Tweet() t.prompt() try: # validate the user name t.validateUser(t.userName) t._clear() # obtain user tweets t.getTweets(t.userName, t.rt) # check if file exsist. create if doesn't and clean if exsists t.exist(t.jsonFile) # dump to json file t.dumpJson(t.jsonFile) print("Complete!!") sleep(0.5) t._clear() # welcome user print(user_prompts.welcome.format(t.userName)) # ask user what to view t.view() except tweepy.TweepError as t: print(t.args[0])
def tweets(self, limit=10): tweets = [] for item in self.collection.find().sort('received_at', desc).limit(limit): tweet_obj = item tweets.append(Tweet(tweet_obj)) return tweets
def getTestdata(search , count=1, emojis = None): auth = tweepy.OAuthHandler("VzAcT7Kf0gAiYBwN9CeKQolqk", "oU80dLgLahfNHS0b7pUZv3EC4MeRZ1UHnpMVbluAlLlSYot4Y0") auth.set_access_token("810807258149949440-WWflrBW2sY7iruVQLjN70dCcn1BUoCf", "mDG1BDjVEMJO4QEch1bUEjnsWuvbhqHeQVRi4wwveTgB0") api = tweepy.API(auth) non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) #search = "happy" itemlimit = count for status in tweepy.Cursor(api.search, lang="en", q=search,tweet_mode="extended", since_id=1).items(itemlimit): # process status here # print status.entities["hashtags"] if "retweeted_status" in dir(status): tweet=status.retweeted_status.full_text else: tweet=status.full_text t1 = Tweet(tweet.translate(non_bmp_map)) t1.processTweet(emojis = emojis) t1.printer() return t1
def index(): REQUESTS.inc() with EXECPTIONS.count_exceptions(): LAST.set(time.time()) INPROGRESS.inc() start = time.time() if request.method == 'POST': try: query = request.form['query'] query_vec = vectorizer.transform( [query]) #(n_docs,x),(n_docs,n_Feats) results = cosine_similarity(X, query_vec).reshape( (-1, )) #Cosine Sim with each doc tweets = [] for i in results.argsort()[-20:][::-1]: tweets.append( Tweet(df.iloc[i, 0], df.iloc[i, 2], df.iloc[i, 3])) INPROGRESS.dec() lat = time.time() LATENCY.observe(lat - start) return render_template('Home.html', query=query, tweets=tweets) except: raise Exception try: INPROGRESS.dec() lat = time.time() LATENCY.observe(lat - start) LATENCY_HIS.observe(lat - start) return render_template('Home.html') except: raise Exception
def getTraindata(bpfile = "Datasets/Train/Sentiment Analysis Dataset.csv", mpfile = "Datasets/Train/smileannotationsfinal.csv", mode = "mp" ,emojis = None): mpdata = [] bpdata = [] if mode == "bp": file = mpfile else: file = bpfile fp = open(file, encoding="utf-8",errors="ignore") non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) i=1 for line in fp: #line1=line.translate(non_bmp_map) tokens = line.split(',') labels = tokens[2].split('|') if labels[0] in Emotions: label = Emotions[labels[0]] t1 = Tweet(tokens[1], label) t1.processTweet(emojis = emojis) #print (t1.text) mpdata.append(t1) i=i+1 print ("Number of data",i) #print(mpdata) fp.close() ## token=mpdata.split(',') ## print(len(token)) # print(len(mpdata)) return mpdata
def __init__(self, filename): tweets = [] positiveproportion = 0 negativeproportion = 0 neutralproportion = 0 with open(filename) as f: reader = csv.reader(f) for row in reader: tweet = Tweet(row) tweets.append(tweet) if tweet.sentiment > 0: positiveproportion += 1 elif tweet.sentiment < 0: negativeproportion += 1 positiveproportion /= len(tweets) negativeproportion /= len(tweets) neutralproportion = 1 - positiveproportion - negativeproportion self.sentimentfractions = [ positiveproportion, negativeproportion, neutralproportion ] self.tweets = tweets self.positivesorted = sorted(tweets, key=lambda i: i.sentiment, reverse=True) self.negativesorted = sorted(tweets, key=lambda i: i.sentiment)
def post(self): self.response.headers["Content-Type"] = "text/html" text_share = self.request.get("text_share") share_image = self.request.get("share_image") if text_share != None or text_share != "": share_type = self.request.get("share_type") if share_type == "Update": edit_tweet_id = self.request.get("edit_tweet_id") edit_tweet = Definitions().get_tweet(tweet_id=edit_tweet_id) edit_tweet.text_share = text_share edit_tweet.put() else: myuser = Definitions().get_login_user() tweet = Tweet(text_share=text_share, user_id=myuser.key.id(), user_name=myuser.user_name, time=datetime.datetime.now()) tweet.put() myuser.tweets_id.append(tweet.key.id()) myuser.put() self.redirect("/")
def readTweets(tweetIDs, folder, label): """ Returns a dictionary containing tweets to the given IDs. Reads from csv file (name = label.csv) and each line is one tweet; data is tab-separated. """ tag_dict = loadTagDictionary() with codecs.open(folder + label + ".csv", 'r', encoding='utf-8') as tweetFile: tweets = tweetFile.read().split("\n")[:-1] tweet_dict = {} for tweet in tweets: tweet = tweet.strip().split("\t") tweetID = tweet[3].strip() if tweetID in tweetIDs: tweet_dict[tweetID] = tweet return { tweetID: Tweet(tag_dict[tweetID], rawTweet=tweet_dict[tweetID], label=label) for tweetID in tweet_dict.keys() }
def load_tweet_corpus(csv_dir): corpus = [] for file in os.listdir(csv_dir): print 'Processing ' + file + '...' with open(os.path.join(csv_dir, file), 'rb') as csv_f: csv_reader = csv.reader(csv_f, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in csv_reader: #returns a row as a list [id, text, hashtags, timestamp, retweeted?] hashtags = row[2].split(',') ht_set = set() if len(hashtags) == 1 and len(hashtags[0]) == 0: #there are no hashtags in the current tweet pass else: for ht in hashtags: if len(ht) == 0 or ht in FORBIDDEN_HASHTAGS: continue ht_set.add(ht) corpus.append( Tweet(long(row[0]), row[1].split(), ht_set, long(row[3]), bool(row[4]))) print 'Sorting...' corpus.sort() print 'Done.' return corpus