def getLocation(phoneNumber):
    htmlGraber = HtmlGraber()
    url = serviceUrl + "?m=" + phoneNumber
    content = htmlGraber.doGrab(url)
    content = content.replace("<br/><br/>", "||")
    items = content.split("||")
    #    print items[1];
    return items[1]
示例#2
0
'''
Created on 2012-9-21

@author: sniperwang
'''
import re
from HtmlGraber import HtmlGraber
from BeautifulSoup import BeautifulSoup
from FileUtil import FileUtil;
import time;
import os;
import sys,getopt;

TOTALPAGES=1;

htmlGraber=HtmlGraber();
bbsHome="https://bbs.sjtu.edu.cn/";
savePathRoot="E:\\ppp\\";

def getHomePage(homeurl):
    patt=re.compile(r'bbstcon,board,PPPerson,reid.*');
    htmlContent=htmlGraber.doGrab(homeurl);
    soup=BeautifulSoup(htmlContent);
    urlPages=soup.findAll("a",href=re.compile(patt));
 #   print "\n".join([bbsHome+str(item.attrs[0][1]) for item in urlPages]);
    return [bbsHome+str(item.attrs[0][1]) for item in urlPages];

def getPrePage(url):
    patt=re.compile(r'bbstdoc,board,PPPerson,page.*');
    htmlContent=htmlGraber.doGrab(url);
    soup=BeautifulSoup(htmlContent);
示例#3
0
'''
Created on 2012-11-7

@author: sniperwang
'''
import re
from BeautifulSoup import BeautifulSoup
from HtmlGraber import HtmlGraber
from FileUtil import FileUtil;

if __name__ == '__main__':
    pass

#url="http://share.renren.com/share/249317678/14623723075?from=0101010302&ref=hotnewsfeed&sfet=104&fin=36&fid=20148636643&ff_id=249317678";
PicUrlHead="http://share.renren.com/share/249317678/14623723075/?photoId=";
htmlGraber=HtmlGraber();
maxCount=50;
firstIndex=249317678-50;
fileUtil=FileUtil();
homeSavePath="E:\\temp\\"

def grabImageUrl(picUrl):
    patt=re.compile(r'http://fmn.rrimg.com/.*');
    htmlContent=htmlGraber.doGrab(picUrl);
   # print htmlContent;
    soup=BeautifulSoup(htmlContent);
    imgurls=soup.findAll('img',id="photo");#re.compile(patt));
    #print str(imgurl[0].src);
#    print "\n".join([str(item) for item in imgurls]);
    if(len(imgurls)>0):
        return imgurls[0]["src"];