示例#1
0
 def parse(self, response):
     self.log("\n\n\n We got data! \n\n\n")
     hxs = HtmlXPathSelector(response)
     sites = hxs.select('//ol[@id=\'result-set\']/li')
     items = []
     for site in sites:
         item = myspiderBotItem()
         item['title'] = site.select('h2/a/text()').extract()
         item['link'] = site.select('h2/a/@href').extract()
         items.append(item)
     return items
示例#2
0
 def parse_item(self, response):
     print "####################"
     self.log("\n\n\n We got data! \n\n\n")
     self.log('Hi, this is an item page! %s' % response.url)
     # hxs = HtmlXPathSelector(response)
     sites = response.xpath('//html/body/div[3]/div/div[2]/div[5]/div/div[1]/div[2]/section/div/div/h3/a')
     # items = []
     print "@@@@@@@@@@@@@@@@"
     items = myspiderBotItem()
     print "$$$$$$$$$$$$$$$$$$"
     items['title'] = sites.xpath('/text()').extract()
     items['link'] = sites.xpath('/@href').extract()
     # items.append(item)
     return items