def startUrlList(self): DL = DownLoader("http://www.blackhatworld.com/blackhat-seo/f103-proxy-lists/") src = DL.highOrderUrllib2() d = pq(src) topicLink = d.find(".inner .threadtitle a") startUrl = [pq(item).attr("href") for item in topicLink[2:]] return startUrl
def brandList(): DL = DownLoader('http://www.xiu.com/brand.html') src = DL.selenium() d = pq(src) text = d.find('li>dl>dd>a').my_text() res = [] for item in text: res.extend(item.split('/')) res = map(lambda x: x.replace(' ', ''), res) return res