def download_tweets(search=None, profile=None, sleep=1): assert search or profile term = urllib.quote_plus(search or profile) url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL response = requests.get(url.format(term=term), headers={'User-agent': USER_AGENT}).text max_position = find_value(response, 'data-max-position') min_position = find_value(response, 'data-min-position') for tweet in parse_search_results(response.encode('utf8')): yield tweet has_more_items = True while has_more_items: response = requests.get(url_more.format(term=term, max_position=min_position), headers={'User-agent': USER_AGENT}).text response_dict = json.loads(response) min_position = response_dict['min_position'] has_more_items = response_dict['has_more_items'] if profile else False for tweet in parse_search_results(response_dict['items_html'].encode('utf8')): yield tweet if search: has_more_items = True time.sleep(sleep)
def download_tweets(search=None, profile=None, sleep=DEFAULT_SLEEP): assert search or profile term = (search or profile) url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL response = requests.get(url.format(term=term), headers={'User-agent': USER_AGENT}).text max_position = find_value(response, 'data-max-position') min_position = find_value(response, 'data-min-position') for tweet in parse_search_results(response.encode('utf8')): yield tweet has_more_items = True while has_more_items: response = requests.get(url_more.format(term=term, max_position=min_position), headers={'User-agent': USER_AGENT}).text try: response_dict = json.loads(response) except: import datetime with open('__debug.response_%s.txt' % datetime.datetime.now().strftime('%Y-%m-%d.%H%M'), 'wb') as fh: print >>fh, repr(response) raise min_position = response_dict['min_position'] has_more_items = response_dict['has_more_items'] if profile else False for tweet in parse_search_results(response_dict['items_html'].encode('utf8')): yield tweet if search: has_more_items = True time.sleep(sleep)
def download_tweets(search=None, profile=None, sleep=DEFAULT_SLEEP): assert search or profile term = (search or profile) url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL response = requests.get( url.format(term=urllib.quote_plus(term)), headers={'User-agent': USER_AGENT}) response_text = response.text min_position = find_value(response_text, 'data-min-position') for tweet in parse_search_results(response_text): yield tweet has_more_items = True last_min_position = None while has_more_items: response = requests.get(url_more.format( term=urllib.quote_plus(term), max_position=min_position), headers={'User-agent': USER_AGENT} ) response_text = response.text try: response_dict = json.loads(response_text) except Exception: import datetime timestamp = datetime.datetime.now().strftime('%Y-%m-%d.%H%M') with open('__debug.response_%s.txt' % timestamp, 'wb') as fh: print >>fh, repr(response_text) raise min_position = response_dict['min_position'] if profile: has_more_items = response_dict['has_more_items'] else: has_more_items = last_min_position != min_position for tweet in parse_search_results(response_dict['items_html']): yield tweet if search: has_more_items = True last_min_position = min_position time.sleep(sleep)
def download_tweets(search=None, profile=None, sleep=DEFAULT_SLEEP): assert search or profile term = (search or profile) url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL response = requests.get(url.format(term=urllib.quote_plus(term)), headers={'User-agent': USER_AGENT}).text max_position = find_value(response, 'data-max-position') min_position = find_value(response, 'data-min-position') for tweet in parse_search_results(response.encode('utf8')): yield tweet has_more_items = True last_min_position = None while has_more_items: response = requests.get(url_more.format(term=urllib.quote_plus(term), max_position=min_position), headers={'User-agent': USER_AGENT}).text try: response_dict = json.loads(response) except: import datetime with open('__debug.response_%s.txt' % datetime.datetime.now().strftime('%Y-%m-%d.%H%M'), 'wb') as fh: print >>fh, repr(response) raise min_position = response_dict['min_position'] has_more_items = response_dict['has_more_items'] if profile else last_min_position != min_position for tweet in parse_search_results(response_dict['items_html'].encode('utf8')): yield tweet if search: has_more_items = True last_min_position = min_position time.sleep(sleep)
def download_tweets(search=None, profile=None, sleep=1): assert search or profile term = urllib.quote_plus(search or profile) url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL response = requests.get(url.format(term=term), headers={ 'User-agent': USER_AGENT }).text max_position = find_value(response, 'data-max-position') min_position = find_value(response, 'data-min-position') for tweet in parse_search_results(response.encode('utf8')): yield tweet has_more_items = True while has_more_items: response = requests.get(url_more.format(term=term, max_position=min_position), headers={ 'User-agent': USER_AGENT }).text response_dict = json.loads(response) min_position = response_dict['min_position'] has_more_items = response_dict['has_more_items'] if profile else False for tweet in parse_search_results( response_dict['items_html'].encode('utf8')): yield tweet if search: has_more_items = True time.sleep(sleep)