def parse(self, response): item=exhibition75Item() item["museumID"]=121 li_list=response.xpath("/html//div/div[1]/div[2]/div[3]/ul/li") for li in li_list: item["exhibitionTheme"]=li.xpath("./span/a/text()").extract_first() item["exhibition_picture"]='http://www.bjqtm.com/'+li.xpath("./a/img/@src").extract_first() yield item
def parse(self, response): item = exhibition75Item() item["museumID"] = 115 li_list = response.xpath("/html//div[3]/div[2]/div[2]/a") for li in li_list: item["exhibitionTheme"] = li.xpath( "./div[2]/div[2]/p[2]/text()").extract_first() item["exhibitionIntroduction"] = li.xpath( "./div[2]/div[2]/p[1]/text()").extract_first() item["exhibition_picture"] = '' yield item
def parse(self, response): item = exhibition75Item() item["museumID"] = 114 li_list = response.xpath("/html/body/div[3]/div[2]/div[2]/ul/li") for li in li_list: item["exhibitionTheme"] = li.xpath( "./a/span/text()").extract_first() item["exhibitionIntroduction"] = ' ' item["exhibition_picture"] = 'http://www.sxhm.com/' + li.xpath( "./a/img/@src").extract_first() yield item
def parse(self, response): item = exhibition75Item() item["museumID"] = 125 li_list = response.xpath("//tr") for li in li_list: item["exhibitionTheme"] = li.xpath( ".//div[2]/div[1]/a/text()").extract_first() item["exhibitionIntroduction"] = li.xpath( ".//div//div[2]/text()").extract_first() item["exhibition_picture"] = '(http://tour.dha.ac.cn)' + str( li.xpath(".//a/img/@src").extract_first()) yield item
def parse(self, response): item = exhibition75Item() item["museumID"] = 105 li_list = response.xpath( "/html/body/div[3]/div/div[2]/div[3]/div[2]/div[1]/div/div") for li in li_list: item["exhibitionIntroduction"] = '' item["exhibitionTheme"] = li.xpath( "./dl/dd/text()").extract_first() item["exhibition_picture"] = li.xpath( "./dl/a/dt/img/@src").extract_first() yield item
def parse(self, response): item = exhibition75Item() item["museumID"] = 118 li_list = response.xpath( "/html//table[4]//tr/td/table[2]//tr/td[3]/table[3]//tr[1]/td/div/ul/li" ) for li in li_list: item["exhibitionTheme"] = li.xpath("./a/text()").extract_first() item["exhibitionIntroduction"] = li.xpath( "./span/text()").extract_first() item["exhibition_picture"] = '' yield item
def parse(self, response): item=exhibition75Item() item["museumID"]=109 li_list=response.xpath("/html//div/div/div/div/div[2]/div/div[1]/div[2]/div/div/div[2]/div/div[1]/ul/li") for li in li_list: item["exhibitionTheme"] = li.xpath("normalize-space(./div[2]/ul/li/h1/strong/a/text())").extract_first() item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\\xa0', u' ') item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\xa0', u' ') if len(item["exhibitionTheme"]) != 0: item["exhibitionIntroduction"] = li.xpath("normalize-space(./div[2]/div/text())").extract_first() item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\\xa0', u' ') item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\xa0', u' ') item["exhibition_picture"] = 'http://www.ynnmuseum.com' + str(li.xpath("./div[1]/div/a/img/@src").extract_first())
def parse(self, response): item = exhibition75Item() item["museumID"] = 126 li_list = response.xpath("/html//div[2]/div/div[2]/div[2]/div[2]/dl") for li in li_list: item['exhibitionTime'] = li.xpath( "./dt/span/text()").extract_first() item["exhibitionTheme"] = li.xpath("./dt/a/text()").extract_first() item['exhibitionIntroduction'] = li.xpath( "normalize-space(./dd[2]/text()[1])").extract_first() item["exhibitionIntroduction"] = str( item["exhibitionIntroduction"]).replace(u'\u3000', u'') item["exhibition_picture"] = str( li.xpath("./dd[1]/a/img/@src").extract_first()) yield item
def parse(self, response): item = exhibition75Item() item["museumID"] = 108 li_list = response.xpath( "/html//div/div[3]/div[1]/div/div[3]/div[1]/div") for li in li_list: item["exhibitionTheme"] = li.xpath( "normalize-space(./div[2]/div/div[1])").extract_first() #item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\\xa0', u' ') #item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace(u'\xa0', u' ') item["exhibitionIntroduction"] = '' #li.xpath( # "normalize-space(./div/div/div/div/div[1]/div[2]/div/text())").extract_first() #item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\\xa0', u' ') # item["exhibitionIntroduction"] = str(item["exhibitionIntroduction"]).replace(u'\xa0', u' ') item["exhibition_picture"] = 'http://www.ynmuseum.org' + li.xpath( './div[1]/a/img/@src').extract_first() yield item
def parse(self, response): item = exhibition75Item() item["museumID"] = 107 li_list = response.xpath("/html//div[4]/div/div/div") for li in li_list: item["exhibitionTheme"] = li.xpath( "normalize-space(./a/span/text())").extract_first() item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace( u'\\xa0', u' ') item["exhibitionTheme"] = str(item["exhibitionTheme"]).replace( u'\xa0', u' ') item["exhibitionIntroduction"] = li.xpath( "normalize-space(./div/div/div/div/div[1]/div[2]/div/text())" ).extract_first() item["exhibitionIntroduction"] = str( item["exhibitionIntroduction"]).replace(u'\\xa0', u' ') item["exhibitionIntroduction"] = str( item["exhibitionIntroduction"]).replace(u'\xa0', u' ') item["exhibition_picture"] = '' yield item