示例#1
0
def testChapterContent(url):
    req = urllib2.Request(url)
    html = urllib2.urlopen(req).read()
    re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>',
                           re.I)  #Script
    html = re_script.sub('', html)
    soup = BeautifulSoup.BeautifulSoup(html)
    cont = soup.find('div', id="content")

    del_dev = cont.findAll('div')

    for del_item in del_dev:
        if del_item.has_key('class') or del_item.has_key('id'):
            del_item.extract()

    cont = str(cont)
    cont = cont.replace('<div id="content">', '')
    cont = cont.replace('</div>', '')
    cont = cont.replace('<div>', '')
    cont = cont.strip()
    cont = Db.escape(cont)
    print(cont)
    sql = 'insert into sg_chapter_0(bk_id,name,content,publish_time,ch_sort) values("%s","%s","%s","%s", "%s")'
    print(sql % ('9', 'test', cont, '2015-09-13 23:25:32', '1'))
    db.execute(sql % ('9', 'test', cont, '2015-09-13 23:25:32', '1'))
示例#2
0
    def chapter(self):

        res = {}
        res['name'] = self.name
        cont = self.soup.find('div', id="content")
        cont = str(cont)
        cont = cont.replace('<div id="content">', '')
        cont = cont.replace('</div>', '')
        cont = cont.replace('<script>readx();</script>', '')
        cont = cont.strip()
        cont = Db.escape(cont)

        res['content'] = cont
        res['publish_time'] = time.strftime('%Y-%m-%d %H:%M:%S')
        return res
示例#3
0
	def chapter(self):

		res = {}
		res['name'] = self.name
		cont = self.soup.find('div', id="content")
		cont = str(cont)
		cont = cont.replace('<div id="content">', '')
		cont = cont.replace('</div>', '')
		cont = cont.replace('<script>readx();</script>', '')
		cont = cont.strip()
		cont = Db.escape(cont)

		res['content'] = cont
		res['publish_time'] = time.strftime('%Y-%m-%d %H:%M:%S')
		return res
示例#4
0
    def chapter(self):

        res = {}
        res['name'] = self.name
        cont = self.soup.find('div', id="content")
        del_dev = cont.findAll('div')

        for del_item in del_dev:
            if del_item.has_key('class') or del_item.has_key('id'):
                del_item.extract()

        cont = str(cont)
        cont = cont.replace('<div id="content">', '')
        cont = cont.replace('</div>', '')
        cont = cont.replace('<div>', '')
        cont = cont.strip()
        cont = Db.escape(cont)

        res['content'] = cont
        res['publish_time'] = time.strftime('%Y-%m-%d %H:%M:%S')

        return res
示例#5
0
	def chapter(self):

		res = {}
		res['name'] = self.name
		cont = self.soup.find('div', id="content")
		del_dev = cont.findAll('div')

		for del_item in del_dev:
			if del_item.has_key('class') or del_item.has_key('id'):
				del_item.extract()

		cont = str(cont)
		cont = cont.replace('<div id="content">', '')
		cont = cont.replace('</div>', '')
		cont = cont.replace('<div>', '')
		cont = cont.strip()
		cont = Db.escape(cont)

		res['content'] = cont
		res['publish_time'] = time.strftime('%Y-%m-%d %H:%M:%S')

		return res