示例#1
0
 def parse_item(self, response):
     i = MysqlpjtItem()
     #通过xpath表达式提取网页标题
     i["name"]=response.xpath("/html/head/title/text()").extract()
     #通过xpath表达式提取网页的关键词
     i["keywd"]=response.xpath("/html/head/meta[@name='keywords']/@content").extract()
     return i
示例#2
0
 def parse_item(self, response):
     i = MysqlpjtItem()
     i['title'] = response.xpath("/html/head/title/text()").extract()
     i['keywd'] = response.xpath("/html/head/meta[@name='keywords']/@content").extract()
     #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
     #i['name'] = response.xpath('//div[@id="name"]').extract()
     #i['description'] = response.xpath('//div[@id="description"]').extract()
     return i
示例#3
0
 def parse_item(self, response):
     i = MysqlpjtItem()
     i['name'] = response.xpath("/html/head/title/text()").extract()
     # print(i['name'])
     i['keywd'] = response.xpath(
         '/html/head/meta[@name="keywords"]/@content').extract()
     # print(i['keywd'])
     return i
示例#4
0
文件: weiwei.py 项目: fan1230/spider
 def parse_item(self, response):
     i = MysqlpjtItem()
     i['name']=response.xpath('/html/head/title/text()').extract()
     if response.xpath('//meta[@name="keywords"]/@content').extract()==[]:
         i['keywd'] = response.xpath('//div[@id="keywords"]/@data-wbkey').extract()
     i['keywd']=response.xpath('//meta[@name="keywords"]/@content').extract()
     #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
     #i['name'] = response.xpath('//div[@id="name"]').extract()
     #i['description'] = response.xpath('//div[@id="description"]').extract()
     return i