def oneOfSeries(urllist,path): searchname = '.*/(.*?).html' current_path = '' for url in urllist: try: name = re.findall(searchname,url,re.S) current_path = path + '/' + name[0] meizi_page_download.picurl(url,current_path) errorReport.success(url) except IndexError: errorReport.errorIndex(url, searchname)
def save_pic(url,path): searchname = '.*/(.*?.jpg)' name = re.findall(searchname,url) filename = path +'/'+ name[0] #print filename + ':start' while True: if os.path.exists(filename): #os.remove(filename) errorReport.errorFileExist(filename) print filename,' exists, skip' return True elif os.path.exists(filename): os.mknod(filename) if download(url,filename): break print filename + ':over' errorReport.success(filename)
conn = urllib2.urlopen(url,data=None,timeout=5) html = conn.read() return html except Exception: errorReport.errorLoadUrl(url) return '' def meizi(url,path): reTagContent = '<div.*?class="tags">.*?<span>(.*?)</span>' reTagUrl = '<a.*?href="(.*?)".*?>' print 'start open meiziwang' html = '' while True: html = loadurl(url) if html == '': print 'load', url,'error' continue else: break tagContent = re.findall(reTagContent, html, re.S) taglists = re.findall(reTagUrl, tagContent[0], re.S) taglists = sorted(list(set(taglists))) print 'open meiziwang over' #print len(taglists) for url in taglists: meizi_series_nextpage.nextpage(url,path) #meizi_series_getpage.tag_series(url,path) #print url meizi('http://www.meizitu.com','/home/hus/Desktop/meizi') errorReport.success('/home/hus/Desktop/meizi') print 'All over success and false has been report with txt file'