def parse_detail_pages(self, response): hxs = Selector(response) jobs = hxs.xpath('//div[contains(@class, "content")]') items = [] for job in jobs: item = JobItem() item["title"] = job.xpath( '//div[contains(@class, "posting-headline")]/h2/text()' ).extract_first() item["company"] = job.xpath( '//div[contains(@class, "main-footer-text page-centered")]/p/a/text()' ).extract() item["company_url"] = job.xpath( '//div[contains(@class, "main-footer-text page-centered")]/p/a/@href' ).extract() item["body"] = job.xpath( '//div[contains(@class, "section page-centered")]').extract() item["location"] = job.xpath( '//div[contains(@class, "sort-by-time posting-category medium-category-label")]' ).extract_first() item["url"] = response.request.url item["pub_date"] = str('n/a') item["email"] = str('n/a') item["salary"] = str('n/a') item["scrape_date"] = timezone.now() item["job_board"] = "Lever" item["board_url"] = "lever.co" items.append(item) return items
def parse_node(self, response, node): item = JobItem() item['title'] = node.xpath('title/text()').extract_first() item['company'] = 'Remote Python' item['body'] = node.xpath('description/text()').extract() item['pub_date'] = 'n/a' item['url'] = node.xpath('link/text()').extract_first() item["scrape_date"] = timezone.now() item["job_board"] = "Remote Python" item["board_url"] = "www.remotepython.com" item["email"] = 'n/a' item["salary"] = 'n/a' item['location'] = 'n/a' return item
def parse_node(self, response, node): item = JobItem() item['title'] = node.xpath('title/text()').extract_first() item['company'] = node.xpath('source/text()').extract_first() item['body'] = node.xpath('description/text()').extract() item['pub_date'] = node.xpath('pubDate/text()').extract_first() item['url'] = node.xpath('link/text()').extract_first() item["scrape_date"] = timezone.now() item["job_board"] = "Indeed" item["board_url"] = "www.indeed.com" item["email"] = str('n/a') item["salary"] = str('n/a') item['location'] = str('n/a') return item
def parse_item(self, response): job = JobItem() job['title'] = response.xpath('/html/head/title/text()').extract() job['body'] = response.xpath('/html/body/blockquote/font').extract() job['url'] = response.url job['pub_date'] = response.xpath( '(/html/body/p)[2]/strong/br/following-sibling::text()').extract() job['scrape_date'] = timezone.now() job['board_title'] = self.board_title job['board_url'] = self.board_url job['org_title'] = response.xpath( '(/html/body/p)[2]/strong/big/text()').extract() job['org_email'] = response.xpath( '(/html/body/p)[2]/strong/a/text()').extract() yield job
def parse_node(self, response, node): item = JobItem() item['title'] = node.xpath('title/text()').extract_first() # item['title'] = str('test') item['company'] = str('Django Gigs') item['body'] = node.xpath('description/text()').extract() item['pub_date'] = node.xpath('pubDate/text()').extract_first() item['url'] = node.xpath('link/text()').extract_first() item['scrape_date'] = timezone.now() item['job_board'] = "Django Gigs" item['board_url'] = "www.djangogigs.com" item['email'] = str('n/a') item['salary'] = str('n/a') item['location'] = str('n/a') return item
def parse_node(self, response, node): item = JobItem() item['title'] = node.xpath('title/text()').extract_first().split('(', 1)[0] item['company'] = node.xpath('name/text()').extract_first() item['body'] = node.xpath('description/text()').extract() item['pub_date'] = node.xpath('pubDate/text()').extract_first() item['url'] = node.xpath('link/text()').extract_first() item["scrape_date"] = timezone.now() item["job_board"] = "Stack Overflow" item["board_url"] = "www.stackoverflow.com" item["email"] = str('n/a') item["salary"] = str('n/a') if node.xpath('location/text()'): item['location'] = node.xpath('location/text()').extract_first() else: item['location'] = str('n/a') return item
def parse_detail_pages(self, response): hxs = Selector(response) jobs = hxs.xpath('//main[contains(@class, "stacked")]') items = [] for job in jobs: item = JobItem() item["title"] = str('n/a') item["company"] = str('n/a') item["body"] = job.xpath( '//main[contains(@class, "stacked")]').extract() item["location"] = job.xpath( '//p[contains(@class, "meta")]').extract_first() item["url"] = response.request.url item["pub_date"] = str('n/a') item["email"] = str('n/a') item["salary"] = str('n/a') # item["tags"] = job.css('.-tags p a.post-tag::text').extract() item["scrape_date"] = timezone.now() item["job_board"] = "Workable" item["board_url"] = "www.workable.com" items.append(item) return items
def parse_detail_pages(self, response): hxs = Selector(response) jobs = hxs.xpath('//div[contains(@id, "content")]') items = [] for job in jobs: item = JobItem() item["title"] = job.xpath( '//h1[contains(@class, "jobtitle")]/text()').extract_first() item["company"] = str('n/a') item["body"] = job.xpath( '//div[contains(@class, "jobdesciption")]').extract() item["location"] = job.xpath( '//span[contains(@class, "meta-job-location-city")]').extract( ) item["url"] = response.request.url item["pub_date"] = str('n/a') item["email"] = str('n/a') item["salary"] = str('n/a') item["scrape_date"] = timezone.now() item["job_board"] = "Recruiter Box" item["board_url"] = "www.recruiterbox.com" items.append(item) return items
def parse_detail_pages(self, response): hxs = Selector(response) jobs = hxs.xpath('//div[contains(@id, "app_body")]') items = [] for job in jobs: item = JobItem() item["title"] = job.xpath( '//h1[contains(@class, "app-title")]/text()').extract_first() item["company"] = job.xpath( '//span[contains(@class, "company-name")]/text()' ).extract_first() item["body"] = job.xpath( '//div[contains(@id, "content")]').extract() item["location"] = job.xpath( '//div[contains(@class, "location")]').extract_first() item["url"] = response.request.url item["pub_date"] = str('n/a') item["email"] = str('n/a') item["salary"] = str('n/a') item["scrape_date"] = timezone.now() item["job_board"] = "Greenhouse" item["board_url"] = "www.greenhouse.io" items.append(item) return items