Exemplo n.º 1
0
 def parse(self, response):
     soup = bs(response.text, 'html.parser')
     print(soup.text)
     return soup
     item = MaoyanspidersItem()
     link = soup.get
     item['films_name'] = 'name'
     item['release_time'] = "tiome"
     yield scrapy.Request(url=url, callback=self.parse1)
     return item
Exemplo n.º 2
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):\
         item = MaoyanspidersItem()
         link = ''+i.get('href'.text)
         item['films_name'] = 'name'
         item['release_time'] = "tiome"
         yield scrapy.Request(url=url,callback=self.parse1)
Exemplo n.º 3
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):\
         item = MaoyanspidersItem()
         title = 
         link = 'https://maoyan.com/'+            item['films_name'] = i.
         item['release_time'] = "tiome"
         yield scrapy.Request(url=link, meta={'item':item},callback=self.parse1)
Exemplo n.º 4
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):
         item = MaoyanspidersItem()
         title = i.find('p',attrs={'class':'name'}).find('a')
         name = title.get('title')
         link = 'https://maoyan.com/'+ title.get('href')           
         time = i.find('p',attrs={'class' : 'releasetime'}).text
         item['films_name'] = name
         item['release_time'] = time
         yield scrapy.Request(url=link, meta={'item':item},callback=self.parse1)
Exemplo n.º 5
0
 def parse2(self, response):
     item = MaoyanspidersItem()
     item['movie_name'] = Selector(response=response).xpath(
         '/html/body/div[3]/div/div[2]/div[1]/h1/text()').extract()[0]
     list_type = Selector(response=response).xpath(
         '/html/body/div[3]/div/div[2]/div[1]/ul/li[1]/a/text()').extract()
     item['movie_type'] = ""
     for i in list_type:
         item['movie_type'] = item['movie_type'] + i + " "
     item['movie_time'] = Selector(response=response).xpath(
         '/html/body/div[3]/div/div[2]/div[1]/ul/li[3]/text()').extract()[0]
     return item
Exemplo n.º 6
0
 def parse(self, response):
     selector = lxml.etree.HTML(response.text)
     for i in range(0,10):
         link = selector.xpath('//*[@id="app"]/div/div/div[1]/dl/dd[1]/div/div/div[1]/p[1]/a')
     soup = bs(response.text,'html.parser')
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):
         item = MaoyanspidersItem()
         title = i.find('p',attrs={'class':'name'}).find('a')
         name = title.get('title')
         link = 'https://maoyan.com'+ title.get('href')           
         time = i.find('p',attrs={'class' : 'releasetime'}).text
         item['films_name'] = name
         item['release_time'] = time
         print(link)
         yield scrapy.Request(url=link, headers = self.header, meta={'item':item},callback=self.parse1)
Exemplo n.º 7
0
    def parse(self, response):
        items = []
        # 打印网页的url
        print(response.url)

        movies = Selector(response=response).xpath('//div[@class="movie-hover-info"]')
        moviecount = len(movies)
        moviebriefs = Selector(response=response).xpath('//div[@movie-hover-title movie-hover-brief"]')

        for i in range(moviecount):

            item = MaoyanspidersItem()
            # 路径使用 / .  .. 不同的含义 
            movieName = moviebriefs[i].xpath('./a/@title')
            movieType = moviebriefs[i].xpath('./a/text()')
            movieTime = movies[i].xpath('./div[@class="movie-hover-title"]/text()')
          

            item['movieName'] = movieName
            item['movieType'] = movieType
            item['movieTime'] = movieTime
            items.append(item)
        return items