def parseTencent(self, response): # print response.url #evenlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']") #oddlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']") #fulllist = evenlist + oddlist #for each in fulllist: # for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"): item = TencentItem() # 职位名称 item['name'] = response.xpath( '//tr/td[@id="sharetitle"]/text()').extract()[0] # 工作地点 item['work_place'] = response.xpath( '//tr[@class="c bottomline"]/td[1]/text()').extract()[0] # 职位类别 # item['position_type'] = each.xpath('./td[2]/text()').extract()[0] # item['position_type'] = self.get_position_type(response) # 招聘人数 item['need_num'] = response.xpath( '//tr[@class="c bottomline"]/td[3]/text()').extract()[0] # 工作职责 item['position_duty'] = response.xpath( '//td[@class="l2"]//li/text()').extract()[0] yield item
def parseTencent(self, response): for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"): item = TencentItem() item['positionName'] = each.xpath("./td[1]/a/text()").extract()[0] #if len(each.xpath("./td[1]/a/text()").extract()) > 0 else '' item['positionLink'] = "https://hr.tencent.com/" + each.xpath("./td[1]/a/@href").extract()[0] #if len(each.xpath("./td[1]/a/@href").extract()) > 0 else '' item['positionType'] = each.xpath("./td[2]/text()").extract()[0] if len(each.xpath("./td[2]/text()").extract()) > 0 else '' item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0] #if len(each.xpath("./td[3]/text()").extract()) > 0 else '' item['workLocation'] = each.xpath("./td[4]/text()").extract()[0] #if len(each.xpath("./td[4]/text()").extract()) > 0 else '' item['publishTime'] = each.xpath("./td[5]/text()").extract()[0] #if len(each.xpath("./td[5]/text()").extract()) > 0 else '' yield item
def parse_item(self, response): print '----------------------------------' print response.text() item = TencentItem() even_list = response.xpath("//a[@class='recruit-list-link']") for each in even_list: # 职位名称 postionName = each.xpath( "./div[@class='recruit-title']/text()").extract()[0] #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get() #item['name'] = response.xpath('//div[@id="name"]').get() #item['description'] = response.xpath('//div[@id="description"]').get() yield item
def parse_item(self, response): for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"): item = TencentItem() # 职位姓名 item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0] # 职位链接 item['positionlink'] = "https://hr.tencent.com/" +each.xpath("./td[1]/a/@href").extract()[0] # 职位类别 item['positionType'] = each.xpath("./td[2]/text()").extract()[0] # 招聘人数 item['positionNum'] = each.xpath("./td[3]/text()").extract()[0] # 工作地点 item['workLocation'] = each.xpath("./td[4]/text()").extract()[0] # 发布时间 item['publishTime'] = each.xpath("./td[5]/text()").extract()[0] yield item
def parseTencent(self, response): for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"): item = TencentItem() # 职位名称 item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0] # 详情连接 item['positionlink'] = each.xpath("./td[1]/a/@href").extract()[0] # 职位类别 item['positionType'] = each.xpath("./td[2]/text()").extract()[0] # 招聘人数 item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0] # 工作地点 item['workLocation'] = each.xpath("./td[4]/text()").extract()[0] # 发布时间 item['publishTime'] = each.xpath("./td[5]/text()").extract()[0] yield item
def parseTencent(self, response): for each in response.xpath('//tr[@class="even"] | //tr[@class="odd"]'): # 初始化模型对象 item = TencentItem() # 职位名称 item["sitionname"] = each.xpath("./td[1]/a/text()").extract()[0] # 详情链接 item["positionlink"] = each.xpath("./td[1]/a/@href").extract()[0] # 类别 item["positiontype"] = each.xpath("./td[2]/text()").extract()[0] # 招聘人数 item["perpleNum"] = each.xpath("./td[3]/text()").extract()[0] # 工作地点 item["workLocation"] = each.xpath("./td[4]/text()").extract()[0] # 发布时间 item["publishTime"] = each.xpath("./td[5]/text()").extract()[0] # 将数据给管道文件处理 yield item
def parseTencent(self, response): #evenlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']") #oddlist = response.xpath("//tr[@class='even'] | //tr[@class='odd']") #fulllist = evenlist + oddlist for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"): item = TencentItem() # 返回的是一个选择器的列表 # 职位名 item['positionName'] = each.xpath("./td[1]/a/text()").extract()[0] # 详细链接 item['positionLink'] = each.xpath("./td[1]/a/@href").extract()[0] # 职位类型 item['positionType'] = each.xpath("./td[2]/text()").extract()[0] # 招聘人数 item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0] # 工作地点 item['workLocation'] = each.xpath("./td[4]/text()").extract()[0] # 发布时间 item['publishTime'] = each.xpath("./td[5]/text()").extract()[0] yield item
def parseTencent(self, response): for each in response.xpath("//tr[@class='even']|//tr[@class='odd']"): item = TencentItem() # 职位名称 item['positionName'] = each.xpath('./td[1]/a/text()').extract()[0] # 详情链接 item['positionLink'] = each.xpath('./td[1]/a/@href').extract()[0] # 职位类别 item['positionType'] = each.xpath('./td[2]/text()').extract()[0] # 招聘人数 item['positionNum'] = each.xpath('./td[3]/text()').extract()[0] # 工作地点 item['workLocation'] = each.xpath('./td[4]/text()').extract()[0] # 发布时间 item['publishTime'] = each.xpath('./td[5]/text()').extract()[0] #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() #i['name'] = response.xpath('//div[@id="name"]').extract() #i['description'] = response.xpath('//div[@id="description"]').extract() yield item
def parseTencent(self, response): # def parse_item(self, response): # i = TencentspiderItem() # #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() # #i['name'] = response.xpath('//div[@id="name"]').extract() # #i['description'] = response.xpath('//div[@id="description"]').extract() # return i for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"): # 初始化模型对象 item = TencentItem() item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0] # 详情连接 item['positionlink'] = each.xpath("./td[1]/a/@href").extract()[0] # 职位类别 # item['positionType'] = each.xpath("./td[2]/text()").extract()[0] # 招聘人数 item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0] # 工作地点 item['workLocation'] = each.xpath("./td[4]/text()").extract()[0] # 发布时间 item['publishTime'] = each.xpath("./td[5]/text()").extract()[0] yield item