Python MyxmlItem示例，myxml.items.MyxmlItem Python示例

示例#1

0

显示文件

    def parse_node(self, response, selector):
        i = MyxmlItem()
#提取邮件信息
        i['link'] = selector.xpath('/person/email/text()').extract()
#输出提取到的邮件信息
        print(i['link'])
        return i

示例#2

0

显示文件

 def parse_node(self, response, selector):
     # item = {}
     item = MyxmlItem()
     item['link'] = selector.xpath('/person/email/text()').extract()
     #item['name'] = selector.select('name').get()
     #item['description'] = selector.select('description').get()
     print(item['link'])
     return item

示例#3

0

显示文件

文件： myxmlpjt.py 项目： dasharea/scrapy-xml

    def parse_node(self, response, selector):
        i = MyxmlItem()
        i['title'] = selector.xpath('/rss/channel/item/title/text()').extract()
        i['link'] = selector.xpath('/rss/channel/item/link/text()').extract()
        for j in range(len(i['title'])):
            print(i['title'][j])
            print(i['link'][j])

        return i

示例#4

0

显示文件

 def parse_node(self, response, selector):
     i = MyxmlItem()
     i['title'] = selector.xpath('/person/email/text()').extract()
     print("E-Mails are: ")
     print(i['title'])
     #i['url'] = selector.select('url').extract()
     #i['name'] = selector.select('name').extract()
     #i['description'] = selector.select('description').extract()
     return i

示例#5

0

显示文件

文件： myxmlspider.py 项目： knowmefly/crawler_learn

 def parse_node(self, response, node):
     i = MyxmlItem()
     i['COMMON'] = node.xpath('/CATALOG/PLANT/COMMON/text()').extract()
     i['PRICE'] = node.xpath('/CATALOG/PLANT/PRICE/text()').extract()
     i['LIGHT'] = node.xpath('/CATALOG/PLANT/LIGHT/text()').extract()
     for j in range(len(i['COMMON'])):
         print("第" + str(j + 1) + "个植物")
         print("价格是：" + i['PRICE'][j])
         print("培养方式：" + i['LIGHT'][j])
     return i

示例#6

0

显示文件

	def parse_node(self, response, node):
		item = MyxmlItem()
		# 利用XPath表达式从指定标签节点提取需要的信息，并存储到对应的Item实例中
		item['title'] = node.xpath('/rss/channel/item/title/text()').extract()
		item['link'] = node.xpath('/rss/channel/item/link/text()').extract()
		item['author'] = node.xpath('/rss/channel/item/author/text()').extract()
		# item['link'] = node.xpath('/person/email/text()')
		# 通过for循环输出存在Item实例中的信息
		print(item)
		return item

示例#7

0

显示文件

 def parse_node(self, response,node):
     i = MyxmlItem()
     i['title']=node.xpath("/rss/channel/item/title/text()").extract()
     i['link'] = node.xpath("/rss/channel/item/link/text()").extract()
     i['author'] = node.xpath("/rss/channel/item/author/text()").extract()
     for j in range(len(i["title"])):
         print(i['title'][j])
         print(i['link'][j])
         print(i['author'][j])
         print("------------------")
     return i

示例#8

0

显示文件

文件： myxmlspider.py 项目： dahu1/SpiderLearning

 def parse_node(self, response, node):
     i = MyxmlItem()
     #i['url'] = selector.select('url').extract()
     #i['name'] = selector.select('name').extract()
     #i['description'] = selector.select('description').extract()
     i['title']=node.xpath('//item/title/text()').extract()
     i['link']=node.xpath('//item/link/text()').extract()
     i['author']=node.xpath('//item/author/text()').extract()
     print len(i['title'])
     for j in range(len(i['title'])):
         print i['title'][j]
         print i['link'][j]
         print i['author'][j]
     return i

示例#9

0

显示文件

文件： myxmlspider.py 项目： Harold1994/DeepinPythonWebClawler

 def parse_node(self, response, node):
     i = MyxmlItem()
     i['title'] = node.xpath("/rss/channel/item/title/text()").extract()
     i['link'] = node.xpath("/rss/channel/item/link/text()").extract()
     i['author'] = node.xpath("/rss/channel/item/author/text()").extract()
     for j in range(len(i['title'])):
         print("No." + str(j + 1) + " passage:")
         print("title:")
         print(i['title'][j])
         print("link:")
         print(i['link'][j])
         print("author:")
         print(i['author'][j])
         print('___________________________')
     return i

示例#10

0

显示文件

 def parse_node(self, response, node):
     i = MyxmlItem()
     i['title'] = node.xpath("/rss/channel/item/title/text()").extract()
     i['link'] = node.xpath("/rss/channel/item/link/text()").extract()
     i['author'] = node.xpath("/rss/channel/item/author/text()").extract()
     for j in range(len(i['title'])):
         print("第"+str(j+1)+"篇文章")
         print("标题是：")
         print(i['title'][j])
         print("对应链接是:")
         print(i["link"][j])
         print("对应作者是:")
         print(i["author"][j])
         print("-----------------")
     return i

示例#11

0

显示文件

 def parse_node(self, response, node):
     i = MyxmlItem()
     #提取信息
     i['title'] = node.xpath('/rss/channel/item/title/text()').extract()
     i['link'] = node.xpath('/rss/channel/item/link/text()').extract()
     i['author'] = node.xpath('/rss/channel/item/author/text()').extract()
     for j in range(len(i['title'])):
         print('第' + str(j + 1) + '篇文章')
         print('标题是:')
         print(i['title'][j])
         print('对应的链接是:')
         print(i['link'][j])
         print('对应的作者是:')
         print(i['author'][j])
         print('-------------------------')
     return i

示例#12

0

显示文件

 def parse_node(self, response, node):
     i = MyxmlItem()
     i['title'] = response.xpath('/rss/channel/item/title/text()').extract()
     i['link'] = response.xpath('/rss/channel/item/link/text()').extract()
     i['author'] = response.xpath(
         '/rss/channel/item/author/text()').extract()
     #i['url'] = selector.select('url').extract()
     #i['name'] = selector.select('name').extract()
     #i['description'] = selector.select('description').extract()
     for j in range(len(i['title'])):
         print("第" + str(j + 1) + "篇文章")
         print("标题：" + i['title'][j])
         print("链接：" + i['link'][j])
         print("作者：" + i['author'][j])
         print("-" * 80)
     return i

示例#13

0

显示文件

    def parse_node(self, response, node):
        i = MyxmlItem()
#利用XPath表达式将对应信息提取出来，并存储到对应的Item中
        i['title'] = node.xpath("/rss/channel/item/title/text()").extract()
        i['link'] = node.xpath("/rss/channel/item/link/text()").extract()
        i['author'] = node.xpath("/rss/channel/item/author/text()").extract()
#通过for循环以此遍历出提取出来存在item中的信息并输出
        for j in range(len(i['title'])):
            print("第"+str(j+1)+"篇文章")
            print("标题是：")
            print(i['title'][j])
            print("对应链接是：")
            print(i['link'][j])
            print("对应作者是：")
            print(i['author'][j])
            print("----------------------")
        return i

示例#14

0

显示文件

文件： myxmlspider.py 项目： xiyouhujing/SpiderPro

    def parse_node(self, response, node):
        i = MyxmlItem()
        i['title'] = node.xpath('/rss/channel/item/title/text()').extract()
        i['link'] = node.xpath('/rss/channel/item/link/text()').extract()
        i['author'] = node.xpath('/rss/channel/item/author/text()').extract()

        for j in range(len(i['title'])):
            print u"第" + str(j + 1) + u"篇文章"
            print u"标题是"
            print i['title'][j]
            print u"对应的链接是"
            print i['link'][j]
            print u"对应的作者是"
            print i['author'][j]
            print "--------------------------"

        return i

示例#15

0

显示文件

文件： myxmlspider.py 项目： kangbb/python_webspider

 def parse_node(self, response, selector):
     i = MyxmlItem()
     #i['url'] = selector.select('url').extract()
     #i['name'] = selector.select('name').extract()
     #i['description'] = selector.select('description').extract()
     #利用XPath表达式将对应的信息提取出来
     i['title'] = selector.xpath('/rss/channel/item/title/text()').extract()
     i['link'] = selector.xpath('/rss/channel/item/link/text()').extract()
     i['author'] = selector.xpath(
         '/rss/channel/item/author/text()').extract()
     #通过for循环遍历提取出来存在item中的信息并输出
     for j in range(len(i['title'])):
         print('第' + str(j + 1) + '篇文章')
         print('标题：' + i['title'][j])
         print('对应链接：' + i['link'][j])
         print('对应作者是：' + i['author'][j])
         print('-------------------------')
     return i

示例#16

0

显示文件

    def parse_node(self, response, node):
        i = MyxmlItem()
        i['title'] = node.xpath('/rss/channel/item/title/text()').extract()
        i['link'] = node.xpath('/rss/channel/item/link/text()').extract()
        i['author'] = node.xpath('/rss/channel/item/author/text()').extract()
        for j in range(len(i['title'])):
        	print(str(j+1) + ' th novel:')
        	print("title is: ")
        	print(i['title'][j])
        	print('link is: ')
        	print(i['link'][j])
        	print('author is: ')
        	print(i['author'][j])
        	print('--------------------------------')

        #i['url'] = selector.select('url').extract()
        #i['name'] = selector.select('name').extract()
        #i['description'] = selector.select('description').extract()
        return i

示例#17

0

显示文件

文件： myxmlspider.py 项目： whyismefly/pythoncrawl

    def parse_node(self, response, selector):
        # item = {}

        item=MyxmlItem()
        item['title'] = selector.xpath("/rss/channel/item/title/text()").extract()
        item['link'] = selector.xpath("/rss/channel/item/link/text()").extract()
        item['author'] = selector.xpath("/rss/channel/item/author/text()").extract()

        for j in range(len(item['title'])):
            print("第"+str(j+1)+"篇文章")
            print("标题是：")
            print(item['title'][j])
            print("对应链接是：")
            print(item['link'][j])
            print("对应作者是：")
            print(item['author'][j])
            print("___________________________")

        return item

示例#18

0

显示文件

文件： myxmlspider.py 项目： Tod0532/SCRAPY_XML_CSV_MUL

    def parse_node(self, response, selector):
        #方法在节点与所提供的标 签名相符合的时候会被调用，在该方法中，可以进行一些信息的提取 和处理的操作
        item = MyxmlItem()
        item['title'] = selector.xpath(
            "/rss/channel/item/title/text()").extract()
        item['link'] = selector.xpath(
            "/rss/channel/item/link/text()").extract()
        item['author'] = selector.xpath(
            "/rss/channel/item/author/text()").extract()

        for j in range(len(item['author'])):
            print("第" + str(j + 1) + "篇文章")
            print("标题是： ")
            print(item['title'][j])
            print("对应链接是：")
            print(item['link'][j])
            print("对应作者是： ")
            print(item['author'][j])
        return item

示例#19

0

显示文件

 def parse_node(self, response, selector):
     #方法在节点与所提供的标 签名相符合的时候会被调用，在该方法中，可以进行一些信息的提取 和处理的操作
     item = MyxmlItem()
     item['link'] = selector.xpath("/person/email/text()").extract()
     print(item['link'])