Exemplo n.º 1
0
def getmaincontent(url):
    page = requests.get(url, headers=headers).text
    html = etree.HTML(page)
    result = html.xpath("//div[@class='show-content']")
    if result is None or len(result) <= 0:
        return ""
    tmp = result[0].xpath("string(.)")
    tmp = utils.dealstring(tmp)
    # print(tmp)
    return tmp
Exemplo n.º 2
0
def save2db(mydict, scatalogid):
    article = OrderedDict()
    for tag in mydict['entries']:
        article['title'] = utils.dealstring(etree.HTML(tag['title']).xpath("string(.)"))
        article['preid'] = scatalogid
        article['href'] = "https://www.jianshu.com/p/" + tag["slug"]
        article['fullcontent'] = getmaincontenthtml(article['href'])
        article['content'] = getmaincontent(article['href'])
        mysql = Mysql()
        mysql.insert_data_to_pages(article)
Exemplo n.º 3
0
def getmaincontenthtml(url):
    page = requests.get(url, headers=headers).text
    html = etree.HTML(page)
    result = html.xpath("//div[@class='show-content']")
    ans = ""
    for i in result:
        tmp = etree.tostring(i, encoding="utf-8")
        tmp = tmp.decode("utf-8").replace("&lt;", "<").replace("&gt;", ">")
        tmp = utils.dealstring(tmp)
        ans += tmp
    # print(ans)
    ans = deal_img_jianshu(ans)
    return ans
Exemplo n.º 4
0
def getmaincontent(url):
    """

	:param url:
	:return:
	"""
    page = requests.get(url).text
    html = etree.HTML(page)
    result = html.xpath("//div[@id='content_views']")
    if result is None or len(result) <= 0:
        return ""
    tmp = result[0].xpath("string(.)")
    tmp = utils.dealstring(tmp)
    return tmp
Exemplo n.º 5
0
def getmaincontenthtml(url):
    """

	:param url:
	:return:
	"""
    page = requests.get(url).text
    html = etree.HTML(page)
    result = html.xpath("//div[@id='content_views']")
    ans = ""
    for i in result:
        tmp = etree.tostring(i, encoding="utf-8")
        tmp = tmp.decode("utf-8").replace("&lt;", "<").replace("&gt;", ">")
        tmp = utils.dealstring(tmp)
        ans += tmp
    return ans