Пример #1
0
    def parse1(self, response):


        x = HtmlXPathSelector(response)
        item = LagouItem()
        item['title']=x.xpath("//dt[@class='clearfix']/h1/text()").extract()[2]
        item['companyname']=x.xpath("//div[@class='s_txt_jobs']//a[@style='font-size:14px;font-weight:bold;color:#000000;']/text()").extract()[0]
        item['tag']=x.xpath("//div[@class='s_txt_jobs']//div[@class='jobdetail_divRight_span']").extract()
        print  x.path("//div[@class='s_txt_jobs']//tbody/tr/td/text()").extract()
        #item['day']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][1]/text()").extract()
        #item['place']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][2]/text()").extract()
        #item['cnt']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][3]/text()").extract()
        #item['jobyear']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][4]/text()").extract()
        #item['xueli']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][5]/text()").extract()

        title=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_1']/text()").extract()
        nei=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 ']/text()").extract()

        print title
        print nei
        for i,j in zip(title,nei):
            if i==u'发布日期:':
                item['day']=j
            elif i==u'工作地点:':
                item['place']=j
            elif i==u'招聘人数:':
                item['cnt']=j
            elif i==u'工作年限:':
                item['jobyear']=j
            elif i.replace(u'\xa0',u'')==u'学历:':
                item['xueli']=j
            elif i==u'薪水范围:':
                item['money']=j

        #item['money']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 jobdetail_xsfw_color ']/text()").extract()
        detail=x.xpath("//td[@class='txt_4 wordBreakNormal job_detail ']/div").extract()[0]
        item['jobdetail']='\t'.join(detail)
        fu=x.xpath("//div[@class='s_txt_jobs']//div[@class='jobdetail_divRight_span']/span[@class='Welfare_label']/text()").extract()
        if len(fu) > 0:
            item['fuli']=fu

        item['url']=response.url
        next_url=x.xpath("//a[@style='border:0px; width:auto;margin-left:5px;']/@href").extract()[2]
        seq = Request(next_url,callback=self.parse)
        yield item
Пример #2
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     sites = hxs.path('//fieldset/ul/li')
     #sites = hxs.path('//ul/li')
     items = []
     for site in sites:
         item = DmozItem()
         item['title'] = site.path('a/text()').extract()
         item['link'] = site.path('a/@href').extract()
         item['desc'] = site.path('text()').extract()
         items.append(item)
     return items
Пример #3
0
 def parse(self,response):
     # filename=os.path.basename(response.url).split(r'.')[0]
     # open(filename,'wb').write(response.bady)
     items=[]
     hxs=HtmlXPathSelector(response)
     sites=hxs.path("//td[@id='postmessage_6511893']/a")
     for site in sites:
         item=x3cn_Item()
         item['title']=site.path("text()").extract()
         item['link']=site.path("@href").extract()
         items.append(item)
     return items
Пример #4
0
 def parse(self, response):
     # filename=os.path.basename(response.url).split(r'.')[0]
     # open(filename,'wb').write(response.bady)
     items = []
     hxs = HtmlXPathSelector(response)
     sites = hxs.path("//td[@id='postmessage_6511893']/a")
     for site in sites:
         item = x3cn_Item()
         item['title'] = site.path("text()").extract()
         item['link'] = site.path("@href").extract()
         items.append(item)
     return items