示例#1
0
    def save_job(self, html_doc):
        job = Job()

        parser = etree.HTMLParser()
        tree   = etree.parse(StringIO(html_doc), parser)

        #title
        title = tree.xpath('//td[@class=\'sr_bt\']/text()')
        for i in title:
            job.title = title[0] 
            break

        #job_detail
        job_detail = tree.xpath('//td[contains(@class, \'txt_4 wordBreakNormal job_detail\')]/div/text()')
        for i in job_detail:
            job.detail = job_detail[0]
            break
    
        welfare = tree.xpath('//span[contains(@class, \'Welfare_label\')]/text()')
        for w in welfare:
            job.welfare.add(w)

        #date location saraly
        txt1 = tree.xpath('//table[contains(@class, \'jobs_1\')]/tr/td[contains(@class, \'txt_1\')]')
        txt2 = tree.xpath('//table[contains(@class, \'jobs_1\')]/tr/td[contains(@class, \'txt_2\')]')
        
        txt1_tag = ['发布日期:', '工作地点:', '薪水范围:' ]
        for i, e in enumerate(txt1):
            if len(e.text.lstrip()) == 0:
                break
            if txt1[i].text == '发布日期:':
                #hdls[txt1[i].text] = txt2[i].text
                job.date = txt2[i].text 
            if txt1[i].text == '工作地点:':
                job.location = txt2[i].text 
            if txt1[i].text == '薪水范围:':
                job.salary = txt2[i].text 

        job.save()
        #need for speed
        self.save_company(tree)