def getLocation(phoneNumber): htmlGraber = HtmlGraber() url = serviceUrl + "?m=" + phoneNumber content = htmlGraber.doGrab(url) content = content.replace("<br/><br/>", "||") items = content.split("||") # print items[1]; return items[1]
''' Created on 2012-9-21 @author: sniperwang ''' import re from HtmlGraber import HtmlGraber from BeautifulSoup import BeautifulSoup from FileUtil import FileUtil; import time; import os; import sys,getopt; TOTALPAGES=1; htmlGraber=HtmlGraber(); bbsHome="https://bbs.sjtu.edu.cn/"; savePathRoot="E:\\ppp\\"; def getHomePage(homeurl): patt=re.compile(r'bbstcon,board,PPPerson,reid.*'); htmlContent=htmlGraber.doGrab(homeurl); soup=BeautifulSoup(htmlContent); urlPages=soup.findAll("a",href=re.compile(patt)); # print "\n".join([bbsHome+str(item.attrs[0][1]) for item in urlPages]); return [bbsHome+str(item.attrs[0][1]) for item in urlPages]; def getPrePage(url): patt=re.compile(r'bbstdoc,board,PPPerson,page.*'); htmlContent=htmlGraber.doGrab(url); soup=BeautifulSoup(htmlContent);
''' Created on 2012-11-7 @author: sniperwang ''' import re from BeautifulSoup import BeautifulSoup from HtmlGraber import HtmlGraber from FileUtil import FileUtil; if __name__ == '__main__': pass #url="http://share.renren.com/share/249317678/14623723075?from=0101010302&ref=hotnewsfeed&sfet=104&fin=36&fid=20148636643&ff_id=249317678"; PicUrlHead="http://share.renren.com/share/249317678/14623723075/?photoId="; htmlGraber=HtmlGraber(); maxCount=50; firstIndex=249317678-50; fileUtil=FileUtil(); homeSavePath="E:\\temp\\" def grabImageUrl(picUrl): patt=re.compile(r'http://fmn.rrimg.com/.*'); htmlContent=htmlGraber.doGrab(picUrl); # print htmlContent; soup=BeautifulSoup(htmlContent); imgurls=soup.findAll('img',id="photo");#re.compile(patt)); #print str(imgurl[0].src); # print "\n".join([str(item) for item in imgurls]); if(len(imgurls)>0): return imgurls[0]["src"];