def test_friends(): fp = "friends_results.txt" crawler = FriendsCollector(limit=1) crawler.authenticate(api_key_file_path) crawler.connect_output([FileWriter(fp, clear=True)]) user_id, cursor, cnt = crawler.collect( [187908577, 34156194, 66003384, 19248625]) crawler.close() os.remove(fp) assert cnt > 0
def test_lookup(): fp = "lookup_results.txt" crawler = UserLookup() crawler.authenticate(api_key_file_path) crawler.connect_output([FileWriter(fp, clear=True)]) query_idx, cnt = crawler.collect( screen_names=["ferencberes91", "Istvan_A_Seres"]) crawler.close() os.remove(fp) assert cnt > 0
def test_people(): fp = "people_results.txt" crawler = PeopleCrawler(limit=2) crawler.authenticate(api_key_file_path) crawler.connect_output([FileWriter(fp, clear=True)]) search_params = {"q": "data scientist"} crawler.set_search_arguments(search_args=search_params) page, cnt = crawler.search() crawler.close() os.remove(fp) assert cnt > 0
def test_recursive(): fp = "recursive_results.txt" crawler = RecursiveCrawler(limit=2) crawler.authenticate(api_key_file_path) crawler.connect_output([FileWriter(fp, clear=True)]) search_params = { "q": "#latest OR #news", "result_type": 'recent', "count": 5 } crawler.set_search_arguments(search_args=search_params) success, max_id, latest_id, cnt = crawler.search(term_func=None) crawler.close() os.remove(fp) assert (success and cnt > 0)
def test_stream(): fp = "stream_results.txt" crawler = StreamCrawler(sync_time=1, limit=10) crawler.authenticate(api_key_file_path) crawler.connect_output([FileWriter(fp, clear=True)]) search_params = { "q": "#latest OR #news", "result_type": 'recent', "count": 5 } crawler.set_search_arguments(search_args=search_params) crawler.search(90, None) crawler.close() results = FileReader(fp).read() os.remove(fp) assert len(results) > 0
from twittercrawler.crawlers import RecursiveCrawler from twittercrawler.data_io import FileWriter, FileReader from twittercrawler.search import get_time_termination, get_id_termination import datetime, time # initialize file_path = "recursive_results.txt" recursive = RecursiveCrawler() recursive.authenticate("../api_key.json") recursive.connect_output([FileWriter(file_path, clear=True)]) # query search_params = { "q": "#bitcoin OR #ethereum OR blockchain", "result_type": "recent", "lang": "en", "count": 100 } recursive.set_search_arguments(search_args=search_params) # termination (collect tweets from the last 5 minutes) now = datetime.datetime.now() time_str = ( now - datetime.timedelta(seconds=300)).strftime("%a %b %d %H:%M:%S +0000 %Y") print(time_str) time_terminator = get_time_termination(time_str) # run search - FIRST STAGE success, max_id, latest_id, cnt = recursive.search(term_func=time_terminator) print("\nFirst stage report:")
from twittercrawler.crawlers import PeopleCrawler from twittercrawler.data_io import FileWriter, SocketWriter, FileReader # prepare writers keys = ["name", "location", "description"] file_path = "people_results.txt" fw = FileWriter(file_path, clear=True, include_mask=keys) sw = SocketWriter(7000, include_mask=keys) # execute this command in a bash console to continue: telnet localhost 7000 # initialize people = PeopleCrawler(limit=5) people.authenticate("../api_key.json") people.connect_output([fw, sw]) # query search_params = { "q": "data scientist AND phd student", } people.set_search_arguments(search_args=search_params) # run search page, cnt = people.search() print(page, cnt) # close people.close() #load results results_df = FileReader(file_path).read()