Exemplo n.º 1
0
def getXunLeiAccount():
	data = []
	html = getPageHTML('http://521xunlei.com/portal.php')
	soup = BeautifulSoup(html, 'html.parser')
	elements = soup.find(id="portal_block_62_content")
	if (elements==None):
		print(u"未找到资源.")
		return data
	tag_a = elements.find_all('a')
	for link in tag_a:
		if (utils.checkLink(link.get("title")) >= 0):
			pageURL = "http://521xunlei.com/" + link.get('href')
			html = getPageHTML(pageURL)
			soup = BeautifulSoup(html, 'html.parser')
			content = soup.find_all("td", class_="t_f")[0]
			flag = "迅雷"
			# flag2 = "迅雷会员账号"
			for text in content.get_text().split("\r\n"):
				text = text.encode('utf-8')
				if (text.find("\n")):
					text = text.split("\n")[0]
				print text
				# if (text.find(flag) >= 0):
				for line in text.split("\n"):
					if (line.find(flag) >= 0 and len(line) < 90):
						_data = utils.removeChineseChar(line)
						if (len(_data.replace(' ', '')) >= 10):
							data.append(_data)
			break
	return data;
Exemplo n.º 2
0
def getXunLeiAccount():
    data = []
    url = "http://xlfans.com"
    html = getPage(url)
    soup = BeautifulSoup(html, 'html.parser')
    tag_a = soup.find_all("article", class_="excerpt")[0]
    html = getPage(tag_a.find_all("a")[0].get('href'))
    soup = BeautifulSoup(html, 'html.parser')
    tag_p = soup.find_all("p")
    for line in tag_p:
        text = line.get_text().encode('utf-8')
        if (text.find("迅雷") >= 0 and text.find("密") >= 0):
            dataList = utils.removeChineseChar(text).split(u'\n')
            for _data in dataList:
                if (len(_data.replace(' ', '')) >= 10):
                    data.append(_data)
    return data
Exemplo n.º 3
0
def getXunLeiAccount():
    data=[]
    url = "http://xlfans.com"
    html = getPage(url)
    soup = BeautifulSoup(html, 'html.parser')
    tag_a = soup.find_all("article", class_="excerpt")[0]
    html = getPage(tag_a.find_all("a")[0].get('href'))
    soup = BeautifulSoup(html, 'html.parser')
    tag_p =soup.find_all("p")
    for line in tag_p:
        text=line.get_text().encode('utf-8')
        if(text.find("迅雷")>=0 and text.find("密")>=0):
            dataList=utils.removeChineseChar(text).split(u'\n')
            for _data in dataList:
                if (len(_data.replace(' ',''))>=10):
                    data.append(_data)
    return data