def run(self): print self.url #先提取单个页面具体链接 pattern = re.compile('<h3><a href="(.*?)html" target="_blank" id="">') page_url_href = self.get_content(self.url,pattern) #再去获取具体的页面 page_url_prefix = 'http://t66y.com/' for item in page_url_href: page_url = page_url_prefix + item +'html' #print page_url try: download_pattern = re.compile('<a target="_blank" .*?rmdown.*?>(.*?)</a>') download_content = self.get_src_content(page_url) download_url = download_pattern.findall(download_content) if download_url != []: #获取下载链接 download_url_link = download_url[0] print download_url_link #获取标题内容 title_pattern = re.compile('<title>(.*?)</title>') title_content = title_pattern.findall(download_content)[0] title_content = title_content.decode('gbk').encode('utf-8') print title_content if download_url_link != '': url_link = Url_Link(title_content,download_url_link) url_link.save() time.sleep(1) except Exception as e: print 'Run Error '+str(e)+'!'
def delete(): Url_Link.objects(name="delete").delete()
def update(): Url_Link.objects(name="hello").update(link="http://www.163.com")
def save_url(): url = Url_Link(name="hello", link="http://www.baidu.com") url.save()