def save_job(self, html_doc): job = Job() parser = etree.HTMLParser() tree = etree.parse(StringIO(html_doc), parser) #title title = tree.xpath('//td[@class=\'sr_bt\']/text()') for i in title: job.title = title[0] break #job_detail job_detail = tree.xpath('//td[contains(@class, \'txt_4 wordBreakNormal job_detail\')]/div/text()') for i in job_detail: job.detail = job_detail[0] break welfare = tree.xpath('//span[contains(@class, \'Welfare_label\')]/text()') for w in welfare: job.welfare.add(w) #date location saraly txt1 = tree.xpath('//table[contains(@class, \'jobs_1\')]/tr/td[contains(@class, \'txt_1\')]') txt2 = tree.xpath('//table[contains(@class, \'jobs_1\')]/tr/td[contains(@class, \'txt_2\')]') txt1_tag = ['发布日期:', '工作地点:', '薪水范围:' ] for i, e in enumerate(txt1): if len(e.text.lstrip()) == 0: break if txt1[i].text == '发布日期:': #hdls[txt1[i].text] = txt2[i].text job.date = txt2[i].text if txt1[i].text == '工作地点:': job.location = txt2[i].text if txt1[i].text == '薪水范围:': job.salary = txt2[i].text job.save() #need for speed self.save_company(tree)