Пример #1
0
 def parse(self, response):
     item=exhibition75Item()
     item["museumID"]=121
     li_list=response.xpath("/html//div/div[1]/div[2]/div[3]/ul/li")
     for li in li_list:
         item["exhibitionTheme"]=li.xpath("./span/a/text()").extract_first()
         item["exhibition_picture"]='http://www.bjqtm.com/'+li.xpath("./a/img/@src").extract_first()
         yield item
Пример #2
0
 def parse(self, response):
     item = exhibition75Item()
     item["museumID"] = 115
     li_list = response.xpath("/html//div[3]/div[2]/div[2]/a")
     for li in li_list:
         item["exhibitionTheme"] = li.xpath(
             "./div[2]/div[2]/p[2]/text()").extract_first()
         item["exhibitionIntroduction"] = li.xpath(
             "./div[2]/div[2]/p[1]/text()").extract_first()
         item["exhibition_picture"] = ''
         yield item
Пример #3
0
 def parse(self, response):
     item = exhibition75Item()
     item["museumID"] = 114
     li_list = response.xpath("/html/body/div[3]/div[2]/div[2]/ul/li")
     for li in li_list:
         item["exhibitionTheme"] = li.xpath(
             "./a/span/text()").extract_first()
         item["exhibitionIntroduction"] = ' '
         item["exhibition_picture"] = 'http://www.sxhm.com/' + li.xpath(
             "./a/img/@src").extract_first()
         yield item
Пример #4
0
 def parse(self, response):
     item = exhibition75Item()
     item["museumID"] = 125
     li_list = response.xpath("//tr")
     for li in li_list:
         item["exhibitionTheme"] = li.xpath(
             ".//div[2]/div[1]/a/text()").extract_first()
         item["exhibitionIntroduction"] = li.xpath(
             ".//div//div[2]/text()").extract_first()
         item["exhibition_picture"] = '(http://tour.dha.ac.cn)' + str(
             li.xpath(".//a/img/@src").extract_first())
         yield item
Пример #5
0
 def parse(self, response):
     item = exhibition75Item()
     item["museumID"] = 105
     li_list = response.xpath(
         "/html/body/div[3]/div/div[2]/div[3]/div[2]/div[1]/div/div")
     for li in li_list:
         item["exhibitionIntroduction"] = ''
         item["exhibitionTheme"] = li.xpath(
             "./dl/dd/text()").extract_first()
         item["exhibition_picture"] = li.xpath(
             "./dl/a/dt/img/@src").extract_first()
         yield item
Пример #6
0
 def parse(self, response):
     item = exhibition75Item()
     item["museumID"] = 118
     li_list = response.xpath(
         "/html//table[4]//tr/td/table[2]//tr/td[3]/table[3]//tr[1]/td/div/ul/li"
     )
     for li in li_list:
         item["exhibitionTheme"] = li.xpath("./a/text()").extract_first()
         item["exhibitionIntroduction"] = li.xpath(
             "./span/text()").extract_first()
         item["exhibition_picture"] = ''
         yield item
Пример #7
0
 def parse(self, response):
     item=exhibition75Item()
     item["museumID"]=109
     li_list=response.xpath("/html//div/div/div/div/div[2]/div/div[1]/div[2]/div/div/div[2]/div/div[1]/ul/li")
     for li in li_list:
         item["exhibitionTheme"] = li.xpath("normalize-space(./div[2]/ul/li/h1/strong/a/text())").extract_first()
         item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\\xa0', u' ')
         item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\xa0', u' ')
         if len(item["exhibitionTheme"]) != 0:
             item["exhibitionIntroduction"] = li.xpath("normalize-space(./div[2]/div/text())").extract_first()
             item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\\xa0', u' ')
             item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\xa0', u' ')
             item["exhibition_picture"] = 'http://www.ynnmuseum.com' + str(li.xpath("./div[1]/div/a/img/@src").extract_first())
Пример #8
0
 def parse(self, response):
     item = exhibition75Item()
     item["museumID"] = 126
     li_list = response.xpath("/html//div[2]/div/div[2]/div[2]/div[2]/dl")
     for li in li_list:
         item['exhibitionTime'] = li.xpath(
             "./dt/span/text()").extract_first()
         item["exhibitionTheme"] = li.xpath("./dt/a/text()").extract_first()
         item['exhibitionIntroduction'] = li.xpath(
             "normalize-space(./dd[2]/text()[1])").extract_first()
         item["exhibitionIntroduction"] = str(
             item["exhibitionIntroduction"]).replace(u'\u3000', u'')
         item["exhibition_picture"] = str(
             li.xpath("./dd[1]/a/img/@src").extract_first())
         yield item
Пример #9
0
    def parse(self, response):
        item = exhibition75Item()
        item["museumID"] = 108
        li_list = response.xpath(
            "/html//div/div[3]/div[1]/div/div[3]/div[1]/div")
        for li in li_list:
            item["exhibitionTheme"] = li.xpath(
                "normalize-space(./div[2]/div/div[1])").extract_first()
            #item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\\xa0', u' ')
            #item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\xa0', u' ')
            item["exhibitionIntroduction"] = ''  #li.xpath(
            #   "normalize-space(./div/div/div/div/div[1]/div[2]/div/text())").extract_first()
            #item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\\xa0', u' ')
            # item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\xa0', u' ')
            item["exhibition_picture"] = 'http://www.ynmuseum.org' + li.xpath(
                './div[1]/a/img/@src').extract_first()

            yield item
Пример #10
0
    def parse(self, response):
        item = exhibition75Item()
        item["museumID"] = 107
        li_list = response.xpath("/html//div[4]/div/div/div")
        for li in li_list:
            item["exhibitionTheme"] = li.xpath(
                "normalize-space(./a/span/text())").extract_first()
            item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(
                u'\\xa0', u' ')
            item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(
                u'\xa0', u' ')
            item["exhibitionIntroduction"] = li.xpath(
                "normalize-space(./div/div/div/div/div[1]/div[2]/div/text())"
            ).extract_first()
            item["exhibitionIntroduction"] = str(
                item["exhibitionIntroduction"]).replace(u'\\xa0', u' ')
            item["exhibitionIntroduction"] = str(
                item["exhibitionIntroduction"]).replace(u'\xa0', u' ')
            item["exhibition_picture"] = ''

            yield item