def gettwitter(query): try: tso = TwitterSearchOrder() tso.set_language('en') tso.set_locale('en') tso.set_keywords([query]) url = "https://twitter.com/search"+tso.create_search_url() print url except TwitterSearchException as e: print(e) html = getHtml(url) soup = BeautifulSoup(html) twits = soup.find_all("p",class_="TweetTextSize") twitters=[] for t in twits: dr = re.compile(r'<[^>]+>',re.S) replacedStr = dr.sub('',str(t)) replacedStr = re.sub(r"([a-zA-z]+://\S*\s{0,1})", "url", replacedStr) twitters.append(replacedStr+"\n") return twitters
# # f = open('/root/sample_data/p.txt','w') # # for i in range(len(result)): # # t = result[i][1].encode('ascii', 'replace').replace("\n"," ") # # f.write(t+"\n") # f.close() from TwitterSearch import TwitterSearchOrder, TwitterSearchException import urllib2 try: tso = TwitterSearchOrder() tso.set_language('en') tso.set_locale('en') tso.set_keywords(['airline mergers']) url = "https://twitter.com/search"+tso.create_search_url() print url response = urllib2.urlopen('http://www.baidu.com/') html = response.read() print html except TwitterSearchException as e: print(e) tso2 = TwitterSearchOrder() tso2.set_search_url(querystr + '&result_type=mixed&include_entities=true') tso2.set_locale('en') print(tso2.create_search_url())