Exemplo n.º 1
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     item = MaoyanspidersItem()
     item['name'] = 'name'
     return  item
Exemplo n.º 2
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):\
         item = MaoyanspidersItem()
         link = soup.get
Exemplo n.º 3
0
    def parse(self, response):
        selector = lxml.etree.HTML(response.text)
        for i in range(0,10):
            
        soup = bs(response.text,'html.parser')
        for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):
            item = MaoyanspidersItem()
            title = i.find('p',attrs={'class':'name'}).find('a')
            name = title.get('title')
            link = 'https://maoyan.com'+ title.get('href')           
            time = i.find('p',attrs={'class' : 'releasetime'}).text
            item['films_name'] = name
            item['release_time'] = time
            print(link)
            yield scrapy.Request(url=link, headers = self.header, meta={'item':item},callback=self.parse1)


    def parse1(self, response):
        item = response.meta['item']
        # soup = bs(response.text,'html.parser')
        soup = bs('./week01/homework02/1375.html')
        type =  soup.find('div',attrs={'class' :'banner'}).find_all('li')[0].text.replace('\n',' ')
        print(soup)
        # print(type)
        item['films_type'] = type
        print(item)
        yield item
Exemplo n.º 4
0
 def parse(self, response):
     selector = lxml.etree.HTML(response.text)
     for i in range(0, 10):
         link = selector.xpath(
             '//*[@id="app"]/div/div/div[1]/dl/dd[1]/div/div/div[1]/p[1]/a'
         ).get('href')
         name = selector.xpath(
             '//*[@id="app"]/div/div/div[1]/dl/dd[1]/div/div/div[1]/p[1]/a'
         ).get('title')
         release_time = selector.xpath(
             '//*[@id="app"]/div/div/div[1]/dl/dd[1]/div/div/div[1]/p[3]')
     soup = bs(response.text, 'html.parser')
     for i in soup.find_all('div', attrs={'class': 'movie-item-info'}):
         item = MaoyanspidersItem()
         title = i.find('p', attrs={'class': 'name'}).find('a')
         name = title.get('title')
         link = 'https://maoyan.com' + title.get('href')
         time = i.find('p', attrs={'class': 'releasetime'}).text
         item['films_name'] = name
         item['release_time'] = time
         print(link)
         yield scrapy.Request(url=link,
                              headers=self.header,
                              meta={'item': item},
                              callback=self.parse1)
Exemplo n.º 5
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     item = MaoyanspidersItem()
     item['films_name'] = 'name'
     item['release_time'] = "tiome"
     return  item
Exemplo n.º 6
0
 def parse(self, response):
     soup = bs(response.text, 'html.parser')
     print(soup.text)
     return soup
     item = MaoyanspidersItem()
     item['films_name'] = 'name'
     item['release_time'] = "tiome"
     yield scrapy.Request(url=url, callback=self.parse)
     return item
Exemplo n.º 7
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):\
         item = MaoyanspidersItem()
         link = soup.get
         item['films_name'] = 'name'
          item['release_time'] = "tiome"
Exemplo n.º 8
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):\
         item = MaoyanspidersItem()
         link = 'https://maoyan.com/'+i.find('').get('href'.text)
         item['films_name'] = i.
         item['release_time'] = "tiome"
         yield scrapy.Request(url=link, meta={'item':item},callback=self.parse1)
Exemplo n.º 9
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):\
         item = MaoyanspidersItem()
         link = i.get('href')
         item['films_name'] = 'name'
         item['release_time'] = "tiome"
         yield scrapy.Request(url=url,callback=self.parse1)
Exemplo n.º 10
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     for i in soup.find_all('div',attrs={'class' : 'movie-item-info'}):
         item = MaoyanspidersItem()
         title = i.find('p',attrs={'class':'name'}).find('a')
         name = title.get('title')
         link = 'https://maoyan.com/'+ title.get('href')           
         time = i.find('p',attrs={'class' : 'releasetime'}).text
         item['films_name'] = name
         item['release_time'] = time
         yield scrapy.Request(url=link, meta={'item':item},callback=self.parse1)
Exemplo n.º 11
0
 def parse(self, response):
     soup = bs(response.text,'html.parser')
     print(soup.text)
     return soup
     for i in soup.find_all('div',attrs)
     item = MaoyanspidersItem()
     link = soup.get
     item['films_name'] = 'name'
     item['release_time'] = "tiome"
     yield scrapy.Request(url=url,callback=self.parse1)
     return  item
Exemplo n.º 12
0
 def parse2(self, response):
     item = MaoyanspidersItem()
     item['movie_name'] = Selector(response=response).xpath(
         '/html/body/div[3]/div/div[2]/div[1]/h1/text()').extract()[0]
     list_type = Selector(response=response).xpath(
         '/html/body/div[3]/div/div[2]/div[1]/ul/li[1]/a/text()').extract()
     item['movie_type'] = ""
     for i in list_type:
         item['movie_type'] = item['movie_type'] + i + " "
     item['movie_time'] = Selector(response=response).xpath(
         '/html/body/div[3]/div/div[2]/div[1]/ul/li[3]/text()').extract()[0]
     return item
Exemplo n.º 13
0
    def parse(self, response):
        items = []
        # 打印网页的url
        print(response.url)

        movies = Selector(response=response).xpath('//div[@class="movie-hover-info"]')
        moviecount = len(movies)
        moviebriefs = Selector(response=response).xpath('//div[@movie-hover-title movie-hover-brief"]')

        for i in range(moviecount):

            item = MaoyanspidersItem()
            # 路径使用 / .  .. 不同的含义 
            movieName = moviebriefs[i].xpath('./a/@title')
            movieType = moviebriefs[i].xpath('./a/text()')
            movieTime = movies[i].xpath('./div[@class="movie-hover-title"]/text()')
          

            item['movieName'] = movieName
            item['movieType'] = movieType
            item['movieTime'] = movieTime
            items.append(item)
        return items