def parse(self, response): item = ProxyspiderItem() print(response.text) ip_detail_temp = eval(response.text) ip_detail_temp2 = ip_detail_temp['origin'] ip_detail_list = ip_detail_temp2.split(',') item['res_ip'] = ip_detail_list[0].strip() item['agent_ip'] = ip_detail_list[1].strip() print(item['res_ip']) print(item['agent_ip']) yield item
def parse(self, response): trs = etree.HTML(response.body).xpath('//table/tr[position()>1]') for tr in trs: try: res = tr.xpath('./td[position()>1]') ip = res[0].text port = res[1].text item = ProxyspiderItem(ip=ip, port=port) yield item except Exception as e: print(e) pass
def parse(self, response): trs=etree.HTML(response.body).xpath('//tbody/tr') for tr in trs: ip=tr.xpath('.//td[@data-title="IP"]') port=tr.xpath('.//td[@data-title="PORT"]') print("IP;{0},PORT:{1}".format(ip[0].text,port[0].text)) item=ProxyspiderItem( ip=ip[0].text, port=port[0].text ) yield item pass
def parse(self, response): movies = Selector(response).xpath('//div[@class="movie-item-hover"]') i = 0 for movie in movies: if (i > 9): break item = ProxyspiderItem() title = movie.xpath( './a/div/div[1]/span[1]/text()').extract()[0].strip() t = movie.xpath('./a/div/div[2]//text()').extract()[2].strip() timet = movie.xpath('./a/div/div[4]//text()').extract()[2].strip() item = {'title': title, 'type': t, 'time': timet} i += 1 yield item
def parse(self, response): print(response.url) movies = Selector( response=response).xpath('''//div[@class='movie-hover-info']''') num = 0 for movie in movies: item = ProxyspiderItem() movie_name = movie.xpath('./div[1]/span[1]/text()').get() movie_categorys = movie.xpath( './div[2]/text()').getall()[1].strip() plan_time = movie.xpath('./div[4]/text()').getall()[1].strip() #print(plan_time) item['movie_name'] = movie_name item['movie_categorys'] = movie_categorys item['plan_time'] = plan_time #print(item['movie_name']) #print(item['movie_categorys']) yield item num += 1 if num >= 10: break
def parse(self, response): counter = 0 divTags = Selector(response=response).xpath('//dd/div/div[@class="movie-item-hover"]') for divTag in divTags: if counter <= 10: title = divTag.xpath( './a/div/div/span[@class="name "]/text()').extract_first() link = divTag.xpath( './a[@data-act="movie-click"]/@href').extract_first() cat = divTag.xpath( './a/div/div[2]/text()').extract()[1].strip('\n').strip() time = divTag.xpath( './a/div/div[4]/text()').extract()[1].strip('\n').strip() item = ProxyspiderItem() item['title'] = title item['link'] = 'https://maoyan.com' + link item['time'] = time item['category'] = cat counter += 1 yield item else: yield
def parse(self, response): movies_info = Selector( response=response).xpath('//div[@class="movie-hover-info"]') for i, movie in enumerate(movies_info): if i > 9: break item = ProxyspiderItem() item['movie_name'] = movie.xpath('./div/span/text()').get() item['movie_score'] = movie.xpath( './div/span[@class="score channel-detail-orange"]').xpath( 'string(.)').get(default='暂无') item['movie_type'] = str( movie.xpath('./div[1]/following-sibling::div[1]/text()'). getall()[1]).strip() item['movie_actor'] = str( movie.xpath('./div[1]/following-sibling::div[2]/text()'). getall()[1]).strip() item['movie_time'] = str( movie.xpath('./div[1]/following-sibling::div[3]/text()'). getall()[1]).strip() yield item
def parse(self, response): item = ProxyspiderItem() item['ip'] = json.loads(response.text)['origin'] yield item
def parse(self, response): items = [] item = ProxyspiderItem() item['ipaddr'] = response.text items.append(item) return items