def main(): with open(filename, 'rb') as csvfile: csvfile.readline() reader = csv.reader(csvfile) for row in reader: t = Tweet( row[0], row[1], row[2], row[3], row[4], row[5], row[6], int(row[7]), int(row[8]), int(row[9]) ) tweets.append(t) top_retweets = sorted(tweets, key = lambda t: t.get_retweets(), reverse=True) top_replies = sorted(tweets, key = lambda t: t.get_replies(), reverse=True) top_favs = sorted(tweets, key = lambda t: t.get_favs(), reverse=True) data = [top_retweets, top_replies, top_favs] print "Welcome to the Twitter Metrics Parser" print "1) Top RTs" print "2) Top Replies" print "3) Top Favs" print "" selection = int(raw_input("Please select what data you are interested in: ")) print "" while selection > len(data) or selection < 1: print "*** That is not a valid selection ****" selection = int(raw_input("Please select what data you are interested in: ")) lst = data[ selection - 1 ] for i in range(top_count): print "--- #" + str(i+1) + " ---" print lst[i]
def get_tweets(search_string, result_type, time_range): api = twitter.Api( consumer_key='7SVoyHlwYgm90Y5HSzmTzUQ9O', consumer_secret='D0crcrKca9S3TXuqGRPYhzBN0LJut34MecER8Ly8fb3xrM0Gja', access_token_key='1199488399238926336-1jV9xq8bs4zdP5qiq96cUwP5GF1Fuz', access_token_secret='71hibaH8BPkPwCytm5CH9N4RJonaRCrSKUqG9y3dwo2Ix') tweets = {} search = "q=" + str(search_string) + "%20&result_type=" + str( result_type) + "&since=" + str(time_range) + "&count=100" print(search) results = api.GetSearch(raw_query=search) i = 0 for result in results: json_result = json.loads(str(result)) t = Tweet(json_result['user']['profile_image_url'], json_result['user']['name'], json_result['text'], json_result['created_at'], json_result['hashtags']) if 'retweet_count' in json_result: t.retweet_count = json_result['retweet_count'] if 'favorite_count' in json_result: t.favorite_count = json_result['favorite_count'] tweets[i] = t i += 1 return tweets
def getTraindata(bpfile="Datasets/Train/Sentiment Analysis Dataset.csv", mpfile="Datasets/Train/smileannotationsfinal.csv", mode="mp", emojis=None): mpdata = [] bpdata = [] if mode == "mp": file = mpfile else: file = bpfile fp = open(file, "r") for line in fp: tokens = line.split(',') labels = tokens[2].split('|') if labels[0] in Emotions: label = Emotions[labels[0]] t1 = Tweet(tokens[1], label) t1.processTweet(emojis=emojis) mpdata.append(t1) fp.close() return mpdata
def post(self): self.response.headers["Content-Type"] = "text/html" share_text = self.request.get("share_text") share_image = self.request.get("share_image") if share_text != None or share_text != "": share_type = self.request.get("share_type") if share_type == "Update": edit_tweet_id = self.request.get("edit_tweet_id") edit_tweet = Services().get_tweet(tweet_id=edit_tweet_id) edit_tweet.share_text = share_text edit_tweet.put() else: myuser = Services().get_login_user() tweet = Tweet(share_text=share_text, user_id=myuser.key.id(), user_name=myuser.user_name, time=datetime.datetime.now()) tweet.put() myuser.tweet_ids.append(tweet.key.id()) myuser.put() self.redirect("/")
def average_degree(self): try: stats_fh = open(self.output_file, 'w') except IOError: print 'Cannot open', self.output_file try: tweet_fh = open(self.input_file) except IOError: print 'Cannot open', self.input_file else: tweets = tweet_fh.readlines() graph = tweet_graph() for tweet_line in tweets: tweet_dec = json.loads(tweet_line) # Ignore tweets with "limit" if "limit" in tweet_dec.keys(): continue cur_tweet = Tweet(tweet_dec) hashtags = cur_tweet.get_hashtags() cur_ts = datetime.strptime(cur_tweet.get_timestamp(), self.format) # Ignore tweets with one or zero hashtags # It will only be used to evict old tweets from the graph if (len(hashtags) >= 2): for hashtag in hashtags: graph.add_vertex(hashtag, cur_ts) edges = self.pairwise(hashtags) for edge in edges: graph.add_edge(graph.get_vertex(edge[0]), graph.get_vertex(edge[1]), cur_ts) else: graph.evict(cur_ts) av_degree = graph.average_degree() stats_fh.write(("%0.2f" % av_degree) + "\n") if self.tracker_en: self.ad_tracker.append(av_degree) (peak_degree, peak_node) = graph.peak_degree() self.pd_tracker.append(peak_degree) self.pn_tracker.append(peak_node) if self.self_checking: if not graph.check_graph(cur_ts): print "Self Checking Failed at " + str(cur_ts) tweet_fh.close() if not stats_fh.closed: stats_fh.close()
def fetch(query, output_file=sys.stdout, debug_file=None, lang="en", geocode="", max_count=500000): ''' Fetches query results into output_files, and prints raw json results into debug_file ''' auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = API(auth, retry_count=10, retry_delay=15, timeout=60, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) print("QUERY:[", query, "]", "OUTPUT:", output_file.name, file=output_file) count = 0 ok_count = 0 for result in Cursor(api.search, q=query, lang=lang).items(max_count): if debug_file: print(result.text + "\n", file=debug_file) tweet = Tweet(result.text) t = tweet.preprocess() if t and tweet.isTagged(): print(",".join(tweet.hashtags) + "\t" + t, file=output_file) ok_count += 1 count += 1 if count % 1000 == 0: print("tweets saved:", ok_count, "/", count) print("Loop end:", ok_count, "/", count, "tweets saved")
def getTraindata(bpfile = "Datasets/Train/Sentiment Analysis Dataset.csv", mpfile = "Datasets/Train/smileannotationsfinal.csv", mode = "mp" ,emojis = None): mpdata = [] bpdata = [] if mode == "bp": file = mpfile else: file = bpfile fp = open(file, encoding="utf-8",errors="ignore") non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) i=1 for line in fp: #line1=line.translate(non_bmp_map) tokens = line.split(',') labels = tokens[2].split('|') if labels[0] in Emotions: label = Emotions[labels[0]] t1 = Tweet(tokens[1], label) t1.processTweet(emojis = emojis) #print (t1.text) mpdata.append(t1) i=i+1 print ("Number of data",i) #print(mpdata) fp.close() ## token=mpdata.split(',') ## print(len(token)) # print(len(mpdata)) return mpdata
def collect_tweets(self): """ # of hashtags per tweet # of tweets with hashtags # mentions per tweet # of tweets with mentions URLs per tweet Tweets with URLs # special characters per tweet # Tweets with special characters Retweets by user Inter-tweet content similarity: Bag of Words w/ Jaccard and cosine similarity Duplicate tweets Duplicate URLs ratio (1-unique URLs/total URLs) Duplicate Domains Ratio (1-unique domains/total domains) Duplicate Mentions Ratio (1-unique mentions/ total mentions) Duplicate hashtags ratio (1-unique hashtags/total hashtags) """ for twt in self.f: tweet = Tweet() tweet.get_features(twt) self.tweets.append(tweet) self.tweet_timings.append(tweet.date) self.tweet_text.append(tweet.html_text) self.app_sources.append(tweet.source) self.retweet_sources.append(tweet.rts) for url in tweet.urls: self.urls.append(url['expanded_url']) self.domains.append(url['display_url'].split('/')[0]) for mention in tweet.mentions: self.mentions.append(mention['id']) for hashtag in tweet.hashtags: self.hashtags.append(hashtag['text'])
def post(self): self.response.headers["Content-Type"] = "text/html" text_share = self.request.get("text_share") share_image = self.request.get("share_image") if text_share != None or text_share != "": share_type = self.request.get("share_type") if share_type == "Update": edit_tweet_id = self.request.get("edit_tweet_id") edit_tweet = Definitions().get_tweet(tweet_id=edit_tweet_id) edit_tweet.text_share = text_share edit_tweet.put() else: myuser = Definitions().get_login_user() tweet = Tweet(text_share=text_share, user_id=myuser.key.id(), user_name=myuser.user_name, time=datetime.datetime.now()) tweet.put() myuser.tweets_id.append(tweet.key.id()) myuser.put() self.redirect("/")
def lambda_handler(event, context): try: # Get data from the database raw_data = get_words_from_database() # Transform the raw data data = transform_data_from_database(raw_data) # Get the keys needed for the Twitter API access_keys = get_twitter_account_info() # Create a new Tweet instance new_tweet = Tweet(access_keys) # Get last twenty timeline posts timeline_posts = new_tweet.get_last_twenty_posts() # Get a random word message = get_random_word(data, timeline_posts) print(f'message to post = {message}') # Post new message to Twitter account new_tweet.create_new_post(message) except Exception as error: message = f'Error occurred during invocation of lambda function. Error = {error}' print(message)
def extractJson(fileName): """ Extract tweets from JSON file into a list of Tweets """ tweetList = list() try: file = open(fileName, encoding='utf-8') json_str = file.read() data = json.loads(json_str) for tweet in data['results']: nTweet = Tweet() nTweet.id = tweet['id'] nTweet.userId = tweet['from_user_id'] nTweet.text = tweet['text'] nTweet.user = tweet['from_user'] nTweet.userName = tweet['from_user_name'] nTweet.profileImgUrlHttp = tweet['profile_image_url'] nTweet.source = tweet['source'] nTweet.toUser = tweet['to_user'] nTweet.date = tweet['created_at'] for mention in tweet['entities']['user_mentions']: nTweet.userMentions.append(mention["id"]) tweetList.append(nTweet) file.close() except(ValueError): sys.exit("Error while parsing{0}".format(fileName) + "Not a valid JSON file") return tweetList
def load_tweets_from_csv(fname='../data/annotated_tweets.csv', preprocess=True, serialize=True): # Load the data into memory print('Loading MTSA csv...') ids_to_content = defaultdict(lambda: []) with open(fname) as f: csv_reader = DictReader(f) for i, row in enumerate(csv_reader): ids_to_content[row[ID_KEY]].append(row) # construct the tweets and labels bar = ShadyBar('Labelling MTSA', max=len(ids_to_content)) tweets = [] for sample in ids_to_content.values(): bar.next() csv_twt = sample[0] # skip the test questions used for crowdflower! if csv_twt[IS_GOLD_KEY] == 'true': continue # build up the tweet statistics of labels tweet_stats = {s: 0 for s in LABELS} for labelling in sample: if labelling[HAS_SENTIMENT_KEY] == 'no': tweet_stats['obj'] += 1 for key in CSV_LABELS: if labelling[POS_NEG_COM_KEY] == key: tweet_stats[key[0:3]] += 1 # Skipping tweet that had < 5 annotations if sum(tweet_stats.values()) < 5: continue # extract the necessary data tweet = Tweet(csv_twt[TWEET_ID], csv_twt['text'], csv_twt['topic']) tweet.labelling = tweet_stats tweets.append(tweet) bar.finish() """ The preprocessing pipeline is: (see preprocessing.py) (tokenize), (filter_tokens), (remove_mentions), (split_hashtags), (autocorrect), (lemmatize) """ print('Removed {} tweets that had < 5 annotations.'.format(len(ids_to_content) - len(tweets))) print('We now have a total of {} tweets in the MTSA.'.format(len(tweets))) if preprocess: preprocess_tweets(tweets) # save data if desired if serialize: np.save('../data/processed_annotated_tweets.npy', np.array(tweets)) return tweets
def json_to_tweets(json_input): tweet = Tweet() with open(json_input, 'r') as jsonFile: data = json.load(jsonFile) for typeID, tweetID in data['id'].items(): tweet.idTweet = int(tweetID) tweet.text = data['text'] return tweet
def _save_tweet(retrieved_tweet, database): user = retrieved_tweet.user.name text = retrieved_tweet.full_text timestamp = retrieved_tweet.created_at tweet = Tweet() tweet.initialize_from_tweet(user, text, timestamp) return insert_tweet(database, tweet)
def load_data(filename): tweet_list = [] with open(filename) as input_file: for line in input_file: tweet = Tweet() tweet.load(line) tweet_list.append(tweet) return tweet_list
def load_clean_tweets_from_file(self, input_file): self.tweets = [] # with codecs.open(input_file, 'r', 'utf-8') as fin: with open(input_file, 'r') as fin: for line in fin: tweet = Tweet() tweet.load_clean(line) self.tweets.append(tweet)
def get_daily_update() -> Tweet: """Return Tweet containing daily update on Infection and Death numbers""" URL = "https://www.worldometers.info/coronavirus/country/us/" data = _scrape_worldometers_data(URL) tweet_str = _construct_tweet_str(data) tweet = Tweet() tweet.attach_text(tweet_str) return tweet
def searchEvent(): results = api.GetSearch("Chicago", lang="en") for result in results: logger.logTweet(result) tweet = Tweet(result.text) tweet.readTweet() tweetprocessor.processTweet(tweet) countAndColor()
def parse_tweets(self): tweet_list = read_tweet(self.test_file) for each in tweet_list: tweet = Tweet(each['id'], each['user'], each['language'], each['content']) tweet.get_ngram_words( vocabulary_type=self.trained_data.vocabulary_type, ngram_type=self.trained_data.ngram_type, filter_type=self.trained_data.filter_type) self.tweets_to_predict.append(tweet)
def computePercentageOfTweetWithKeyword(self, keywords, k): # no tweet if len(self._tweet_cluster['tweets']) == 0: return 0 # compute the percentage of tweets with at least k keywords occ = 0 for tweet in self._tweet_cluster['tweets']: tweet = Tweet(tweet) if tweet.findKeywords(keywords) >= k: occ += 1 return 1.0 * occ / len(self._tweet_cluster['tweets'])
def computePercentageOfTweetWithKeyword(self, keywords, k): # no tweet if len(self._tweet_cluster['tweets']) == 0: return 0 # compute the percentage of tweets with at least k keywords occ = 0 for tweet in self._tweet_cluster['tweets']: tweet = Tweet(tweet) if tweet.findKeywords(keywords) >= k: occ += 1 return 1.0*occ/len(self._tweet_cluster['tweets'])
def classify_tweets(self): tweet_list = read_tweet(self.training_file) for each in tweet_list: tweet = Tweet(each['id'], each['user'], each['language'], each['content']) tweet.get_ngram_words(vocabulary_type=self.vocabulary_type, ngram_type=self.ngram_type, filter_type=self.filter_type) if each['language'] in self.lang_classes: self.lang_classes[each['language']].add_new_tweet(tweet)
def randomize_tweet(animations, batch, msg): angle, x, y = randomize_starting_point() direction = [math.cos(angle), math.sin(angle)] b = Tweet(msg, animations, direction, gravity=settings.GRAVITY, batch=batch) b.x = x b.y = y return b
def load_raw_tweets_from_file(self, input_file): cnt = 0 with codecs.open(input_file, 'r', 'utf-8') as fin: for line in fin: cnt += 1 if cnt % 20000 == 0: print 'Lines:', cnt, ' Tweets:', len(self.tweets) try: tweet = Tweet() tweet.load_raw(line) self.tweets.append(tweet) except: continue
def searchEvent(): results = api.GetSearch(SEARCH_TERM, lang="en") # Log tweets outside of timer for result in results: logger.logTweet(result) start_time = time.time() for result in results: tweet = Tweet(result.text) tweet.readTweet() tweetprocessor.processTweet(tweet) logger.logTiming("legacy", (time.time() - start_time), tweetprocessor.calcHighest()) countAndColor()
def test_tweet_retrieval(self): twitter_searcher = TwitterSearcher() tweet_list = twitter_searcher.simple_search(1) retrieved_tweet = tweet_list[0] user = retrieved_tweet.user.name text = retrieved_tweet.full_text timestamp = retrieved_tweet.created_at tweet = Tweet() tweet.initialize_from_tweet(user, text, timestamp) self.assertTrue(tweet.user != None) self.assertTrue(tweet.tweet != None)
def main(): ti = TweetInterface() ti.setDB('citybeat_production') ti.setCollection('tweets') tc = ti.getAllDocuments({'created_time' : {'$gte' : '1378711668'}}) print tc.count() cnt = 0 for tweet in tc: tweet = Tweet(tweet) text = tweet.getText().lower() if 'quinn' in text: print text cnt += 1 print cnt
def select_user_tweet(): if len(twitter_user_list.userList) == 0: print('User list is empty.') else: print('--- USER LIST ---') twitter_user_list.print_user_name() user_option_selected = int(input('Select an user (number): ')) user_tweet_text = input('What\'s on your mind?: ') user_tweet = Tweet( user_tweet_text, twitter_user_list.userList[user_option_selected - 1].name) tweet_user_list.tweets.append(user_tweet) print(user_tweet.print_twitter_format()) menuApp()
def hosts(self): """Returns top 5 candidates for host""" relevant = [ t for t in self.tweets if 'hosting' in t.rawtext ] return Tweet.common_names(relevant)[:5]
def dressed(self): """Most talked about their dress""" relevant = [ t for t in self.tweets if t.has_tok('dress') ] return Tweet.common_names(relevant)[:5]
def funny(self): """Most talked about for being funny""" relevant = [ t for t in self.tweets if t.has_tok('funny') ] return Tweet.common_names(relevant)[:5]
def get(self): self.response.headers["Content-Type"] = "text/html" user_id = self.request.GET.get("user_id") spectator = True follow = "Follow" if user_id != None: myuser_key = ndb.Key("MyUser", user_id) myuser = myuser_key.get() tweets = Tweet.query(Tweet.user_id == myuser.key.id()).fetch(50) if user_id == str(Definitions().get_login_user().key.id()): spectator = False if user_id in Definitions().get_login_user().following: follow = "Unfollow" template_values = { "myuser": myuser, "follow":follow, "tweets":tweets, "spectator":spectator, } template = JINJA_ENVIRONMENT.get_template("editpage.html") self.response.write(template.render(template_values))
def readTweets(tweetIDs, folder, label): """ Returns a dictionary containing tweets to the given IDs. Reads from csv file (name = label.csv) and each line is one tweet; data is tab-separated. """ tag_dict = loadTagDictionary() with codecs.open(folder + label + ".csv", 'r', encoding='utf-8') as tweetFile: tweets = tweetFile.read().split("\n")[:-1] tweet_dict = {} for tweet in tweets: tweet = tweet.strip().split("\t") tweetID = tweet[3].strip() if tweetID in tweetIDs: tweet_dict[tweetID] = tweet return { tweetID: Tweet(tag_dict[tweetID], rawTweet=tweet_dict[tweetID], label=label) for tweetID in tweet_dict.keys() }
def tweets(self): tweets = [] for tweet_obj in self.db: tweets.append(Tweet(tweet_obj)) return tweets
def display_create_tweet(): # Si la méthode est de type "GET" if request.method == 'GET': # On affiche notre formulaire de création return render_template('create_tweet.html') else: # Sinon, notre méthode HTTP est POST # on va donc créer un nouveau tweet # récupération du nom de l'auteur depuis le corps de la requête authorName = request.form['author'] # récupération du contenu depuis le corps de la requête content = request.form['content'] # Création d'une variable image par défaut vide. image = None # récupération de l'image depuis le corps de la requête f = request.files['image'] # Si il y a bel et bien une image d'uploadé if f.filename != '': # On construit le chemin de destination de notre image (où est-ce qu'on va la sauvegarder) filepath = os.path.join(app.root_path, 'static', 'uploads', f.filename) # On sauvegarde notre image dans ce chemin f.save(filepath) # création de l'url de l'image pour son affichage (à l'aide de son nom) image = url_for('static', filename='uploads/' + f.filename) # Création d'un tweet à l'aide de notre constructeur (qui se trouve dans le fichier tweet.py) tweet = Tweet(authorName, content, image) # Insertion de notre tweet en première position dans notre tableau tweets.insert(0, tweet) # Redirection vers la liste de nos tweets return redirect(url_for('display_tweets'))
def loadTweets(filename): """ Load tweets from a filename. Returns a list of Tweet objects. """ tweets = open(filename, 'r').read().splitlines() print "Loading %d tweets from %s ..." % (len(tweets), filename) tweetObjects = [] for tweet in tweets: try: js = json.loads(tweet) if (not ('place' in js)) or js['place'] == None: continue elif (not ('full_name' in js['place'])): continue elif (not ('geo' in js)) or js['geo'] == None: continue elif (not ('coordinates' in js['geo'])): continue coords = js['geo']['coordinates'] place = js['place'] tweetObject = Tweet(js['text'], place['full_name'], coords[0], coords[1], place['country'], js['created_at']) tweetObjects.append(tweetObject) except ValueError: pass print "Loaded %d tweets" % len(tweetObjects) return tweetObjects
def index(): REQUESTS.inc() with EXECPTIONS.count_exceptions(): LAST.set(time.time()) INPROGRESS.inc() start = time.time() if request.method == 'POST': try: query = request.form['query'] query_vec = vectorizer.transform( [query]) #(n_docs,x),(n_docs,n_Feats) results = cosine_similarity(X, query_vec).reshape( (-1, )) #Cosine Sim with each doc tweets = [] for i in results.argsort()[-20:][::-1]: tweets.append( Tweet(df.iloc[i, 0], df.iloc[i, 2], df.iloc[i, 3])) INPROGRESS.dec() lat = time.time() LATENCY.observe(lat - start) return render_template('Home.html', query=query, tweets=tweets) except: raise Exception try: INPROGRESS.dec() lat = time.time() LATENCY.observe(lat - start) LATENCY_HIS.observe(lat - start) return render_template('Home.html') except: raise Exception
def __init__(self, filename): tweets = [] positiveproportion = 0 negativeproportion = 0 neutralproportion = 0 with open(filename) as f: reader = csv.reader(f) for row in reader: tweet = Tweet(row) tweets.append(tweet) if tweet.sentiment > 0: positiveproportion += 1 elif tweet.sentiment < 0: negativeproportion += 1 positiveproportion /= len(tweets) negativeproportion /= len(tweets) neutralproportion = 1 - positiveproportion - negativeproportion self.sentimentfractions = [ positiveproportion, negativeproportion, neutralproportion ] self.tweets = tweets self.positivesorted = sorted(tweets, key=lambda i: i.sentiment, reverse=True) self.negativesorted = sorted(tweets, key=lambda i: i.sentiment)
def tweets(self, limit=10): tweets = [] for item in self.collection.find().sort('received_at', desc).limit(limit): tweet_obj = item tweets.append(Tweet(tweet_obj)) return tweets
def get_tweets(self, count, lang, **kwargs): """ Get's tweets from twitter and returns them in a list. By default returns a single tweet in english. To change language, pass the language code in the parameters. Args: count: Number of tweets to be returned. lang: Language flag by default its set to english (en), pass the language code to change language. **kwargs: Arbitrary keyword arguments. Returns: List of tweets, in json format. """ tweets = [] try: if "filters" in kwargs: stream = self._get_filter(kwargs["filters"], kwargs) else: stream = self._get_iterator() for tweet in stream: tweets.append(Tweet(json.dumps(tweet))) if count - 1 <= 0: break return tweets except Exception as e: raise e
def load_tweets(): '''Returns a list of Tweet objects loaded from the path specified in the first sys argument''' try: filepath = sys.argv[1] except: print("usage: datamining.py filepath") quit() return Tweet.fromFile(filepath)
def extractTweets(fileName): """ Extract tweets from JSON file into a list of Tweets """ tweetList = list() try: file = open(fileName) json_str = file.read() data = json.loads(json_str) print("Parsing " + fileName + " ...") for tweet in data["results"]: nTweet = Tweet() nTweet.id = tweet["id"] nTweet.userId = tweet["from_user_id"] nTweet.text = tweet["text"] nTweet.user = tweet["from_user"] nTweet.userName = tweet["from_user_name"] nTweet.profileImgUrlHttp = tweet["profile_image_url"] nTweet.source = tweet["source"] nTweet.toUser = tweet["to_user"] nTweet.date = tweet["created_at"] if "urls" in tweet["entities"]: for urls in tweet["entities"]["urls"]: urlStr = urls["url"] expandedUrl = urls["expanded_url"] try: u = urllib.urlopen(expandedUrl) expandedUrl = u.url u = None except IOError as ioe: print("Error urllib.urlopen") print("---> URL = {}".format(expandedUrl)) continue nTweet.urls.append(expandedUrl) for mention in tweet["entities"]["user_mentions"]: nTweet.userMentions.append( {"id": mention["id"], "name": mention["name"], "screenName": mention["screen_name"]} ) tweetList.append(nTweet) file.close() except (ValueError): sys.exit("Error while parsing {0}".format(fileName) + " Not a valid JSON file") return tweetList
def twauth(request): if 'oauth_token' in request.GET and 'oauth_verifier' in request.GET: tw = Tweet(twitter_settings) reg = tw.register(oauth_token=request.session['oauth_token'], oauth_secret=request.session['oauth_token_secret'], pin=request.GET['oauth_verifier']) if not reg: raise Exception, reg if 'oauth_token_secret' in reg and 'oauth_token' in reg: pp = PostPlace() pp.userid = reg['user_id'] pp.sn_type = SNType.objects.get(code='twitter') pp.access_token = reg['oauth_token'] pp.access_token_secret = reg['oauth_token_secret'] pp.user = request.user pp.save() return pp return False
def clean_tweets(self): self.num_unicode_tweets = 0 same_tweets_after_clean_cnt = 0 try: clean_fh = open(self.output_file, "w") except IOError: print "Cannot open", self.output_file try: tweet_fh = open(self.input_file) except IOError: print "Cannot open", self.input_file else: tweets = tweet_fh.readlines() for tweet_line in tweets: tweet_dec = json.loads(tweet_line) if "limit" in tweet_dec.keys(): continue cur_tweet = Tweet(tweet_dec) clean_fh.write(cur_tweet.to_str() + "\n") # This portion of code is for basic self checking mechanism if self.self_checking: cur_text = cur_tweet.get_text() if cur_tweet.remove_non_ascii(cur_text) != cur_text: same_tweets_after_clean_cnt += 1 if not cur_tweet.is_tweet_ascii(): self.num_unicode_tweets += 1 clean_fh.write("\n%d tweets contained unicode" % (self.num_unicode_tweets)) tweet_fh.close() if self.self_checking and (same_tweets_after_clean_cnt != self.num_unicode_tweets): print "Self checking failure: " + str(same_tweets_after_clean_cnt) + str(self.num_unicode_tweets) if not clean_fh.closed: clean_fh.close()
def load_tweets(api, username_file, load_file): NUM_USERS = 150 NUM_TWEETS_PER_USER = 1000 TOP_TWEETS_PER_USER = 250 tweets = [] if not os.path.exists(load_file): sys.stdout.write("Downloading twitter data " +\ "for {} users at {} tweets/user..."\ .format(NUM_USERS, TOP_TWEETS_PER_USER)) users = load_users(username_file, NUM_USERS) for username in users: user_tweets = pull_top_tweets_for_user(\ api,\ username,\ NUM_TWEETS_PER_USER,\ TOP_TWEETS_PER_USER\ ) if user_tweets: sys.stdout.write('.') tweets.extend(user_tweets) else: print "\nFailed to obtain user data for username: {}"\ .format(username) print "Saving tweet data in {}...".format(load_file) write_tweets(load_file, tweets) else: sys.stdout.write("Loading saved twitter data...") with open(load_file, 'r') as f: json_data = f.read() data = json.loads(json_data, encoding='latin1') i = 0 for d in data: i += 1 if i % 2000 == 0: sys.stdout.write('.') i = 0 try: tweet = Tweet.from_dict(d) tweets.append(tweet) except TwitterException as e: e.log() print "\nDone.\n" return tweets
def find_presenter(self, tweets): """Find the presenter of an award""" relevant = [ t for t in self.filter_tweets(tweets) if t.has_tok('pres') # lancaster stems presenter to pres ] common = Tweet.common_names(relevant) filtered = [ cand for cand in common if not ('Best' in cand) and 'Golden Globes' != cand ] return filtered[:3]
def search(self, term, newsid): print "search('%s','%s') called." % (term, newsid) self.redis.incr("news:%s:crawled_tweets" % newsid) # 1 day for _ in range(1, 2): for page in xrange(1, 16): try: results = self.twitter.search(q=term, page=("%d" % page)) for tweet_data in results["results"]: tweet_data["news_id"] = newsid tweet = Tweet(tweet_data) tweet.save() except: print "Exception: search('%s','%s')" % (term, newsid) print "-" * 60 traceback.print_exc(file=sys.stdout) continue time.sleep(120)
def ParseTweet(): global idfdict,tfdict,tweetObjectdict # Loop over all lines f = file('mars_tweets_medium.json', "r") #f = file('test.json',"r") i =0 lines = f.readlines() #tweetObjectdict = {} for line in lines: try: tweet = simplejson.loads(unicode(line),encoding="UTF-8") #print tweet t1 = Tweet() t1.loadtweet(tweet,tfdict) tweetObjectdict[t1.tid] = t1 i = i+1 except ValueError: pass global TotalTweets TotalTweets = i return tweetObjectdict
def addUserTweets(self, user): """Gets the tweets from a user via its timeline and creates a Tweet object each time""" try: timeline = self.corpus.getUserTimeline(self.twitter, user.screen_name) except urllib.request.HTTPError: print('An Error Occured, please restart the application.') tweets = [] for ttweet in timeline: tweet = Tweet() tweet.tid = ttweet['id'] tweet.text = ttweet['text'] tweet.createdAt = ttweet['created_at'] tweet.retweeted = ttweet['retweeted'] if (not ttweet['in_reply_to_user_id'] is None): tweet.isReply = True tweet.replyTo = ttweet['in_reply_to_user_id'] tweets.append(tweet) user.tweets = self.createDeltaTimes(tweets)
def get_tweets(self, limit=10, offset=0): from tweet import Tweet new_tweets = [] cached_media = memcache.get(self.id) or {} cached_tweets_json = cached_media.get('tweets') or [] if not cached_tweets_json or limit + offset > len(cached_tweets_json): tweets = Tweet.all().filter('media =', self) if len(cached_tweets_json) and offset < len(cached_tweets_json): latest_time = iso8601.parse_date(cached_tweets_json[0]['time']).replace(tzinfo=None) tweets = tweets.filter('time >', latest_time) tweets = tweets.order('-time').fetch(limit, offset=offset) if tweets and len(tweets): new_time = tweets[0].time inserted = False # Insert by timestamp for i in range(len(cached_tweets_json)): time = iso8601.parse_date(cached_tweets_json[i]['time']).replace(tzinfo=None) if new_time > time: for t in tweets: if t.id == cached_tweets_json[i]['id']: break new_tweets.append(t.to_json()) inserted = True if inserted: new_tweets += cached_tweets_json[i:] break new_tweets.append(cached_tweets_json[i]) if not cached_tweets_json or not inserted: new_tweets = cached_tweets_json + [t.to_json() for t in tweets] cached_tweets_json = new_tweets if len(new_tweets) else cached_tweets_json cached_media['tweets'] = cached_tweets_json memcache.set(self.id, cached_media) return cached_tweets_json[offset:offset+limit]
if message: res = vk.VKPost(destination, message, attachments) try: js = json.loads(res) if 'error' in js: raise Exception, 'error found in vk responce' else: print res except Exception: destination.enabled = False destination.save() if destination.sn_type.code == 'twitter': message = "" tw = Tweet(twitter_settings) tw.login(destination.access_token, destination.access_token_secret) if item['title']: message = "%s" % item['title'] if item['attachements']: for attach in item['attachements']: message += " %s" % attach['src'] if message: if tw.post(message): print 'Twitter f****d' if destination.sn_type.code == 'fb': fb_settings['redirect_uri'] = "%s%s" % (HTTP_HOST, reverse('my.views.syncfacebook', args=[sync.id])) fb = FB(fb_settings, code=destination.access_token) fb.login() message = ""
def on_status(self, status): print(status.text) tweet = Tweet(content = status.text) tweet.save()
def setUp(self): app.config['TESTING'] = True self.app = app.test_client() Tweet.create_table(fail_silently=True)
def update_mode(): global MAX_COUNT global consumer_key global consumer_secret global access_token global access_token_secret if not os.path.exists("./tweets") or not os.path.exists("./urls"): sys.stderr.write("First must call init\n") exit(1) auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) records = codecs.open("tweets", "r", encoding="utf-8") tweets = [] for line in records: tweet = Tweet.fill_from_line(line) if not tweet: sys.stderr.write("bad tweets file\n") print line exit(1) tweets.append(tweet) records.close() tiff = [] tiff = api.user_timeline(screen_name = "TiffanyAlvord", since_id = int(tweets[0].id)) for t in tiff: urls = {} index = 0 for x in t.entities["urls"]: html = urllib2.urlopen(x["expanded_url"]) urls[index] = [x["url"], x["expanded_url"]] page = open("urls/" + str(t.id) + "-" + str(index) + ".html", "w") page.write(html.read()) page.close() index += 1 text = t.text text = text.replace('\n', '\\n') tweet = Tweet(t.id, t.user.name, t.user.id, text, t.created_at, urls) tweets.append(tweet) tweets = sorted(tweets, reverse = True, key = lambda Tweet: Tweet.id) old_tweets = tweets[MAX_COUNT:] for t in old_tweets: if len(t.urls) > 0: os.system("rm ./urls/" + str(t.id) + "*") tweets.remove(t) records = codecs.open("tweets", "w", encoding="utf-8") for t in tweets: t.write_to_file(records) records.close()
def tweet_counter(self): from tweet import Tweet return len(Tweet.objects(poster=self))
from tweet import Tweet twitter_configuration = { 'consumer_key' : '9yHE3TdYv9cVbVi5tme6C76Gj', 'consumer_secret' : 'noaO55Ra6jxWHYX4HDAo24PRskms7L8ZWMerNEAEeNCdOlbmvI', 'access_token' : '383755446-jhC6SSZ0NoeEz9FSiiQ4SKB1SAZ3KtFUJ3Vzxpbl', 'access_token_secret' : 'yfYvmFgfTCCWiJKug9zM2Yde169XUx75Jyt9sxywTGxQj' } ############################ tweet = Tweet(twitter_configuration) tweet.send_tweet()
def new(request): if not request.user: return HttpResponseRedirect(reverse('registration.views.login')) source = None fields = None sn_types = SNType.objects.all() if 'src' in request.GET: source = request.GET['src'] #rss feed if source == 'rss': error = 0 if request.POST: response = str(feedtest(request)) if response.find('Feed ok') == -1: error = 1 else: pp = PostPlace() pp.sn_type = SNType.objects.get(code='rss') pp.url = request.POST['feedurl'] pp.user = request.user pp.save() s = Sync() s.source = pp s.user = pp.user s.save() return HttpResponseRedirect(reverse('my.views.sync', args=[s.id])) fields = [ {'name': 'feedurl', 'type': 'text', 'size': 50, 'label': 'Feed URL', 'sort': 0}, #{'name': 'feederror', 'type': 'error', 'message': '!!!!!'}, {'name': 'testfeed', 'type': 'button', 'size': 5, 'value': 'Test feed', 'onclick': 'feedtest();', 'sort': 3}, {'name': 'testfeedresult', 'type': 'alertdiv', 'sort': 4}, {'name': 'feeduptime', 'type': 'select', 'values': [{'value': 30, 'name': '30 min'}], 'label': 'Refresh time', 'sort': 5}, {'name': 'feedsave', 'type': 'submit', 'value': 'Далее', 'sort': 6}, ] if error: import operator fields.append({'name': 'feederror', 'type': 'error', 'message': "Не получается разобрать RSS канал. Может в URL ошиблись?", 'sort': 1}) fields.sort(key=operator.itemgetter('sort')) # vkontakte if source == 'vk': pp = vkauth(request) if pp: s = Sync() s.source = pp s.user = pp.user s.save() return HttpResponseRedirect(reverse('my.views.sync', args=[s.id])) fields = [ #{'name': 'message', 'type': 'message', 'message': vk_notice}, {'name': 'vkauth', 'type': 'button', 'size': 5, 'value': 'Авторизация через сайт Vkontakte', 'onclick': 'vkontakteauth(\'%s%s?%s\');' % (HTTP_HOST, reverse('my.views.new'), 'src=vk')}, #{'name': 'token', 'type': 'text', 'size': 50, 'label': 'Token'}, #{'name': 'feedsave', 'type': 'submit', 'value': 'Далее'}, ] # twitter if source == 'twitter': pp = twauth(request) if pp: s = Sync() s.source = pp s.user = pp.user s.save() return HttpResponseRedirect(reverse('my.views.sync', args=[s.id])) tw = Tweet(twitter_settings) register = tw.register( callbackurl="%s%s?src=twitter" % (HTTP_HOST, reverse('my.views.new'))) request.session['oauth_token_secret'] = register['data']['oauth_token_secret'] request.session['oauth_token'] = register['data']['oauth_token'] fields = [ {'name': 'twauth', 'type': 'button', 'size': 5, 'value': 'Авторизоваться через Twitter', 'onclick': 'twitterauth(\'%s?src=twitter\', \'%s\');' % ( reverse('my.views.new'), register['url'])}, ] #facebook if source == 'fb': pp = fbauth(request) if pp: s = Sync() s.source = pp s.user = pp.user s.save() return HttpResponseRedirect(reverse('my.views.sync', args=[s.id])) fb_settings['redirect_uri'] = "%s%s?src=fb" % (HTTP_HOST, reverse('my.views.new')) fb = FB(fb_settings) url = fb.register() fields = [ {'name': 'fbauth', 'type': 'button', 'size': 5, 'value': 'Авторизоваться через Facebook', 'onclick': 'facebookauth(\'%s\');' % url}, ] # forsquare if source == 'fs': callbackurl = "%s%s%%3Fsrc%%3Dfs" % (HTTP_HOST, reverse('my.views.new')) pp = fsauth(request, callbackurl) if pp: s = Sync() s.source = pp s.user = pp.user s.save() return HttpResponseRedirect(reverse('my.views.sync', args=[s.id])) fields = [ {'name': 'fsauth', 'type': 'button', 'size': 5, 'value': 'Авторизация через сайт Foursquare', 'onclick': 'foursquareauth(\'%s\');' % callbackurl}, ] #livejournal if source == 'lj': if request.POST: pp = PostPlace() pp.sn_type = SNType.objects.get(code='lj') pp.user = request.user pp.login = request.POST['user'] pp.password = md5.md5(request.POST['password']).hexdigest() pp.save() s = Sync() s.source = pp s.user = pp.user s.save() return HttpResponseRedirect(reverse('my.views.sync', args=[s.id])) fields = [ {'name': 'message', 'type': 'message', 'message': lj_notice}, {'name': 'user', 'type': 'text', 'size': 50, 'label': 'Lj user'}, {'name': 'password', 'type': 'password', 'size': 50, 'label': 'Password'}, {'name': 'feedsave', 'type': 'submit', 'value': 'Продолжить'}, ] return {'sn_types': sn_types, 'source': source, 'fields': fields}
if len(sys.argv) < 4: print "Usage: main.py x y width height" sys.exit(1) cam = Camera(1, {'width': 640, 'height': 480}) roi = {'x': sys.argv[1], 'y': sys.argv[2], 'w': sys.argv[3], 'h': sys.argv[4]} twitter_configuration = { 'consumer_key' : '9yHE3TdYv9cVbVi5tme6C76Gj', 'consumer_secret' : 'noaO55Ra6jxWHYX4HDAo24PRskms7L8ZWMerNEAEeNCdOlbmvI', 'access_token' : '383755446-jhC6SSZ0NoeEz9FSiiQ4SKB1SAZ3KtFUJ3Vzxpbl', 'access_token_secret' : 'yfYvmFgfTCCWiJKug9zM2Yde169XUx75Jyt9sxywTGxQj' } tweet = Tweet(twitter_configuration) # globals n = 60 buffer = [] avg = 0 threshold = 3 current_diff = 0 img = None def count_white_pixels(): global img img = cam.getImage() img = img.crop(roi['x'], roi['y'], roi['w'], roi['h']) img_edge = img.edges(50, 200) img_mat = img_edge.getNumpy().flatten()
def sync(request, syncid): errors = [] try: sync = Sync.objects.get(pk=syncid) except Sync.DoesNotExist: return HttpResponseNotFound() sn_types = SNType.objects.filter(read_only=False) if request.POST: sync.title = request.POST['synctitle'] sync.save() for var in request.POST.keys(): test = re.findall("sync_target_id_(\d+)", var) if test and test[0] and request.POST[var]: pp = PostPlace.objects.get(pk=test[0]) pp.userid = request.POST[var] pp.save() for var in request.POST.keys(): test = re.findall("userid_(\d+)", var) if test and test[0] and request.POST[var]: pp = PostPlace.objects.get(pk=test[0]) pp.userid = request.POST[var] pp.save() if sync.source.sn_type.code == 'vk': vk = VK(vk_settings) groups = vk.VKGetGroups(sync.source.access_token) if 'error' in groups: raise Exception, groups['error']['error_msg'] if 'response' in groups: groups = groups['response'] sync.source.sources = [] for i in range(1, len(groups)): group = groups[i] if group['is_admin']: sync.source.sources.append({'id': group['gid'], 'name': group['name']}) if sync.source.sn_type.code == 'fb': fb_settings['redirect_uri'] = "%s%s?src=fb" % (HTTP_HOST, reverse('my.views.new')) fb = FB(fb_settings, code=sync.source.access_token) sync.source.sources = [] try: fb.login() groups = fb.getGroups() pages = fb.getPages() except Exception: pass if groups: for group in groups: if 'administrator' in group: sync.source.sources.append({'id': int(group['id']), 'name': 'Группа - ' + group['name']}) if pages: for page in pages: sync.source.sources.append({'id': int(page['id']), 'name': 'Страницы - ' + page['name']}) # for group in groups: # if 'administrator' in group: # sync.source.sources.append({'id': int(group['id']), 'name': group['name']}) destination = None destinations_vk = [] destinations_fb = [] fields = None if 'destination' in request.GET: destination = request.GET['destination'] if destination == 'vk': if request.POST and 'token' in request.POST: pp = PostPlace() pp.sn_type = SNType.objects.get(code='vk') pp.user = request.user pp.access_token = request.POST['token'] pp.save() sync.destination.add(pp) sync.save() return HttpResponseRedirect(reverse('my.views.sync', args=[sync.id])) fields = [ {'name': 'vkauth', 'type': 'button', 'size': 5, 'value': 'Авторизация через сайт Vkontakte', 'onclick': 'vkontakteauth(\'%s%s\');' % (HTTP_HOST, reverse('my.views.syncvk', args=[sync.id]))}, ] if destination == 'twitter': tw = Tweet(twitter_settings) register = tw.register( callbackurl="%s%s" % (HTTP_HOST, reverse('my.views.synctwitter', args=[sync.id]))) request.session['oauth_token_secret'] = register['data']['oauth_token_secret'] request.session['oauth_token'] = register['data']['oauth_token'] fields = [ {'name': 'twauth', 'type': 'button', 'size': 5, 'value': 'Авторизоваться через Twitter', 'onclick': 'twitterauth(\'%s\', \'%s\');' % ( reverse('my.views.synctwitter', args=[sync.id]), register['url'])}, ] if destination == 'fb': fb_settings['redirect_uri'] = "%s%s" % (HTTP_HOST, reverse('my.views.syncfacebook', args=[sync.id])) fb = FB(fb_settings) url = fb.register() fields = [ {'name': 'fbauth', 'type': 'button', 'size': 5, 'value': 'Авторизоваться через Facebook', 'onclick': 'facebookauth(\'%s\');' % url}, ] #livejournal if destination == 'lj': if request.POST: pp = PostPlace() pp.sn_type = SNType.objects.get(code='lj') pp.user = request.user pp.login = request.POST['user'] pp.password = md5.md5(request.POST['password']).hexdigest() pp.save() sync.destination.add(pp) sync.save() return HttpResponseRedirect(reverse('my.views.sync', args=[sync.id])) fields = [ {'name': 'message', 'type': 'message', 'message': lj_notice}, {'name': 'user', 'type': 'text', 'size': 50, 'label': 'Lj user'}, {'name': 'password', 'type': 'password', 'size': 50, 'label': 'Password'}, {'name': 'feedsave', 'type': 'submit', 'value': 'Продолжить'}, ] if sync.destination.all(): for dest in sync.destination.all(): new_sn_types = [] for sn_type in sn_types: if sn_type != dest.sn_type: new_sn_types.append(sn_type) sn_types = new_sn_types if dest.sn_type.code == 'vk': req = "https://api.vkontakte.ru/method/groups.get?access_token=%s&extended=1" % dest.access_token resp = urllib2.urlopen(req) groups = json.loads(resp.read()) if 'error' in groups: errors.append({'dest_id': dest.id, 'message': groups['error']['error_msg']}) sync.destination.remove(dest) if 'response' in groups: groups = groups['response'] for i in range(1, len(groups)): group = groups[i] if group['is_admin']: destinations_vk.append({'id': group['gid'], 'name': group['name']}) if dest.sn_type.code == 'fb': groups = None pages = None fb_settings['redirect_uri'] = "%s%s" % (HTTP_HOST, reverse('my.views.syncfacebook', args=[sync.id])) fb = FB(fb_settings, code=dest.access_token) try: fb.login() groups = fb.getGroups() pages = fb.getPages() except Exception: pass if groups: for group in groups: if 'administrator' in group: destinations_fb.append({'id': int(group['id']), 'name': 'Группа - ' + group['name']}) if pages: for page in pages: destinations_fb.append({'id': int(page['id']), 'name': 'Страница - ' + page['name']}) return {'sync': sync, 'sn_types': sn_types, 'destination': destination, 'fields': fields, 'destinations_vk': destinations_vk, 'destinations_fb': destinations_fb, 'errors': errors}