Пример #1
0
    def parse(self, response):
#初始化item
        item=MypjtItem()
#通过Xpath表达式提取该网页中的标题信息
        item["title"]=response.xpath("/html/head/title").extract()
#输出提取到的标题信息
        print item["title"]
Пример #2
0
    def parse(self, response):
        item=MypjtItem()
        item["title"]=response.xpath("/html/head/title").extract()
        #print item["title"]
# item["title"]是一个列表,所以我们可以通过for循环遍历出该列表中的元素
        for i in item["title"]:
#对遍历出来的标题信息进行encode("gbk")编码
            print i.encode("gbk")
Пример #3
0
 def parse(self, response):
     # pass
     item = MypjtItem()
     item["title"] = response.xpath("/html/head/title/text()").extract()
     item["key"] = response.xpath(
         "//meta[@name='keywords']/@content").extract()
     print(item["title"])
     #别忘了return item 不然抓到的数据什么都没有返回
     return item
Пример #4
0
 def parse(self, response):
     item = MypjtItem()
     #通过Xpath表达式提取网页中的标题信息
     # (4)
     item["title"] = response.xpath("/html/head/title/text()").extract()
     item["key"] = response.xpath(
         "//meta[@name='keywords']/@content").extract()
     #直接输出,在Python3.X中,虽然包含中文信息,但直接输出即可
     print(item["title"], item["key"])
     return item
Пример #5
0
 def __init__(self):
     super(CnySpider, self).__init__()
     self.timeList = []
     self.datetime = None
     self.currentPageIndex = 0
     self.allPagesNum = None
     self.item = MypjtItem()
     self.item['data_list'] = []
     self.item['currency_name'] = 'CNY'
     #获取最新的更新时间
     self.datetime = check_all_currency_tb('CNY', "CNY_tb")
     self.item['new_update_date'] = self.datetime
Пример #6
0
 def __init__(self):
     super(EurSpider,self).__init__()
     self.currency_name='EUR'
     self.currency_tb_name=self.currency_name+'_tb'
     self.page_data=[]
     self.item=MypjtItem()
     self.item['currency_name']=self.currency_name
     self.item['data_list']=[]
     self.item['top_list']=['货币名称','汇率','更新日期','default']
     #print(self.item['new_update_date'])
     self.exchange_currency_list=['CNY','JPY','GBP','USD','RUB']#人民币 美元 英镑 欧元 卢布
     self.index=0
     logger.info(self.start_urls[0])
     check_all_currency_tb(self.currency_name,self.currency_tb_name)
Пример #7
0
    def parse(self, response):
        item=MypjtItem()
#通过Xpath表达式提取网页中的标题信息
        item["title"]=response.xpath("/html/head/title/text()")
#直接输出,在Python3.X中,虽然包含中文信息,但直接输出即可
        print(item["title"])
Пример #8
0
 def parse(self, response):
     item  = MypjtItem()
     item['urlname'] = response.xpath('/html/head/title/text()')
     item['key'] = response.xpath('//meta[@name="keywords"]/@content').extract()
     yield item
Пример #9
0
 def parse(self, response):
     item = MypjtItem()
     item["title"] = response.xpath("/html/head/title/text()").extract()
     item['key'] = response.xpath("//meta[@name='keywords']/@content").extract()
     print(item)
     return item
Пример #10
0
 def parse(self, response):
     item = MypjtItem()
     item["title"] = response.xpath("/html/head/title/text()").extract()  #后面这个方法很重要,anything.xpath('...') is a selector, not a string
     item["key"] = response.xpath("//meta[@name='keywords']/@content").extract()
     #print(item["title"])
     yield item #需要有item的返回哦,不然pipeline抓不到数据