def scan(geosearchclass, q): global keep_scanning search_results = geosearchclass.search() old_ids = [sr.id for sr in search_results] for s in search_results: q.put(s) while keep_scanning: for i in range(5): if keep_scanning: time.sleep(1) else: return geosearchclass.result_type = "recent" search_results = geosearchclass.search() new_search_results = new_tweets(search_results, old_ids) if new_search_results: for nsr in new_search_results: q.put(nsr) return
def updating_plot(geosearchclass, number_of_words, grow=True): search_results = geosearchclass.search() filtered_words = utils.tokenize_and_filter(search_results) fdist = utils.get_freq_dist(filtered_words) # set up plot samples = [item for item, _ in fdist.most_common(number_of_words)] freqs = [fdist[sample] for sample in samples] plt.grid(True, color="silver") plt.plot(freqs, range(len(freqs))) plt.yticks(range(len(samples)), [s for s in samples]) plt.ylabel("Samples") plt.xlabel("Counts") plt.title("Top Words Frequency Distribution") plt.ion() plt.show() # set up loop old_ids = set([s.id for s in search_results]) for i in xrange(100): plt.pause(5) # use mixed above, change to recent here geosearchclass.result_type = "recent" # perturbation study # if i%2: # for testing purposes # # #change location every odd time to nyc # # geosearchclass.latitude =40.734073 # # geosearchclass.longitude =-73.990663 # # perturb latitude # geosearchclass.latitude =geosearchclass.latitude + .001 # else: # #now back to sf # # geosearchclass.latitude = 37.7821 # # geosearchclass.longitude = -122.4093 # geosearchclass.longitude =geosearchclass.longitude + .001 search_results = geosearchclass.search() new_search_results = utils.new_tweets(search_results, old_ids) if new_search_results: filtered_words = utils.tokenize_and_filter(new_search_results) fdist = update_fdist(fdist, filtered_words) if grow: newsamples = [ item for item, _ in fdist.most_common(number_of_words) ] s1 = set(newsamples) s2 = set(samples) s1.difference_update(s2) if s1: print "New words: " + str(list(s1)) newsamples = list(s1) samples.extend(newsamples) plt.yticks(range(len(samples)), [s for s in samples]) freqs = [fdist[sample] for sample in samples] plt.plot(freqs, range(len(freqs))) if grow: plt.draw() print '%d new tweet(s)' % len(new_search_results) old_ids.update(set([s.id for s in new_search_results])) else: print "no updates"
def updating_plot(geosearchclass, number_of_words, grow=True): search_results = geosearchclass.search() filtered_words = utils.tokenize_and_filter(search_results) fdist = utils.get_freq_dist(filtered_words) # set up plot samples = [item for item, _ in fdist.most_common(number_of_words)] freqs = [fdist[sample] for sample in samples] plt.grid(True, color="silver") plt.plot(freqs, range(len(freqs))) plt.yticks(range(len(samples)), [s for s in samples]) plt.ylabel("Samples") plt.xlabel("Counts") plt.title("Top Words Frequency Distribution") plt.ion() plt.show() # set up loop old_ids = set([s.id for s in search_results]) for i in xrange(100): plt.pause(5) # use mixed above, change to recent here geosearchclass.result_type = "recent" # perturbation study # if i%2: # for testing purposes # # #change location every odd time to nyc # # geosearchclass.latitude =40.734073 # # geosearchclass.longitude =-73.990663 # # perturb latitude # geosearchclass.latitude =geosearchclass.latitude + .001 # else: # #now back to sf # # geosearchclass.latitude = 37.7821 # # geosearchclass.longitude = -122.4093 # geosearchclass.longitude =geosearchclass.longitude + .001 search_results = geosearchclass.search() new_search_results = utils.new_tweets(search_results, old_ids) if new_search_results: filtered_words = utils.tokenize_and_filter(new_search_results) fdist = update_fdist(fdist, filtered_words) if grow: newsamples = [item for item, _ in fdist.most_common(number_of_words) ] s1 = set(newsamples) s2 = set(samples) s1.difference_update(s2) if s1: print "New words: " + str(list(s1)) newsamples = list(s1) samples.extend(newsamples) plt.yticks(range(len(samples)), [s for s in samples]) freqs = [fdist[sample] for sample in samples] plt.plot(freqs, range(len(freqs))) if grow: plt.draw() print '%d new tweet(s)' % len(new_search_results) old_ids.update(set([s.id for s in new_search_results])) else: print "no updates"