def query(q, minTime, maxTime, timeWindow): print "helloo" # q=term1+"+"+term2 #query #time loop time=minTime print minTime print maxTime while (time<maxTime): timeL=time timeU=time+timeWindow time=time+timeWindow #finding the pagae number i=1 link="http://topsy.com/s/"+q+"/tweet?maxtime="+str(timeU)+"&mintime="+str(timeL)+"&offset="+str((i-1)*10)+"&page="+str(i) search = urllib2.urlopen(link) html = search.read() soup = BeautifulSoup(html) try: pageNumText=soup.find('span',{"class":"page-number"}).text # the text is "...about page#" or "... of page#" except: pageNumText=" " if (pageNumText.find('about')!=-1): pageIndex=pageNumText.find('about')+6 elif ((pageNumText.find('of')!=-1)): pageIndex=pageNumText.find('of')+3 #finding the maximum page# try: page=int(pageNumText[pageIndex:]) except: page=2 #page loop for i in range (1,page): link="http://topsy.com/s/"+q+"/tweet?maxtime="+str(timeU)+"&mintime="+str(timeL)+"&offset="+str((i-1)*10)+"&page="+str(i) print link search = urllib2.urlopen(link) html = search.read() soup = BeautifulSoup(html) #print soup for body in soup.findAll('div',class_="twitter-post-big"):#The body loop for tweet in body.findAll('span',{"class":"twitter-post-text translatable language-en"}):#The tweets loop a = tweet.text myfile.write(a.encode("utf-8")+'\n') print a,'\n' ValidatedTweet=cl.cleanseTweet(a) tweets.append(tweet.text)
def query(term1,term2,page): ValidatedTweet = '' a ='' for i in range (1,page): search = urllib2.urlopen("http://topsy.com/s/"+term1+"+"+term2+"/tweet?allow_lang=en&window=m&om=b&offset="+str(i*10)+"&page="+str(i)) html = search.read() soup = BeautifulSoup(html) for body in soup.findAll('div',class_="twitter-post-big"):#The body loop for tweet in body.findAll('span',{"class":"twitter-post-text translatable language-en"}):#The tweets loop a = tweet.text myfile.write(a.encode("utf-8")+'\n') print a,'\n' ValidatedTweet=cl.cleanseTweet(a) tweets.append(tweet.text)
def query(term1, term2, page): ValidatedTweet = '' a = '' for i in range(1, page): search = urllib2.urlopen("http://topsy.com/s/" + term1 + "+" + term2 + "/tweet?allow_lang=en&window=m&om=b&offset=" + str(i * 10) + "&page=" + str(i)) html = search.read() soup = BeautifulSoup(html) for body in soup.findAll('div', class_="twitter-post-big"): #The body loop for tweet in body.findAll( 'span', {"class": "twitter-post-text translatable language-en" }): #The tweets loop a = tweet.text.encode("utf-8") myfile.write(a + '\n') ValidatedTweet = cl.cleanseTweet(a) tweets.append(ValidatedTweet) print ValidatedTweet, '\n'