def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//div[@id='fragment-2']/ul[@class='link-list']/li") for row in jobs: item = JobData() item['link'] = row.select("./a/@href").extract()[0] string = row.select("./a/text()").extract()[0] list_of_string = string.split(", ") count = string.count(", ") if count == 1: item['title'] = list_of_string[0].lower() item['location'] = list_of_string[1].lower() else: item['title'] = list_of_string[0].lower() item['location'] = list_of_string[-1].lower() item['source'] = "www.vmp.fi" items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//tr") for row in jobs: item = JobData() item['title'] = row.select("./td[1]/a/text()").extract() item['link'] = row.select("./td[1]/a/@href").extract() item['location'] = row.select("./td[6]/div/text()").extract() #item['location'] = "espoo, vantaa" # We need to specify where we fetch data from item['source'] = "www.staffpoint.fi" items.append(item) # it return a first empty rows so we need to delete it before writing. items.remove(items[0]) for item in items: item['title'] = item['title'][0].lower() item['link'] = "https://www.staffpoint.fi" + item['link'][0] item['location'] = item['location'][0] item['location'] = " ".join(item['location'].split()).lower() # check if location is empty or not if item['location'] == "": item['location'] = "find more about location on website" # Now we are going to get the description of the job for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select( "//div[@class='span9 tablet-span9']/article[@class='mp-job-hit media']" ) for row in jobs: item = JobData() string = row.select( "./div[@class='media-body']/a/h3/text()").extract()[0].lower() if ", " in string: string_list = string.split(", ") item['title'] = string_list[0] item['location'] = string_list[1] else: item['title'] = string item['location'] = "all" item['link'] = "https://www.experis.fi" + row.select( "./div[@class='media-body']/a/@href").extract()[0] item['source'] = "www.experis.fi" items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//ul[@class='feed']/li") for row in jobs: item = JobData() item['link'] = "http://www.alrekry.fi/" + row.select("./a/@href").extract()[0].lower() item['title'] = row.select("./a/b/text()").extract()[0].lower() item['source'] = "www.alrekry.fi" string = row.select("./a/text()").extract()[0] string = string.replace(", ", "") string = string.strip() if string == (""): string = "unknow unknow" list_of_string = string.split(" ") if len(list_of_string) == 1: item['location'] = list_of_string[0].lower() else: item['location'] = list_of_string[1].lower() items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//div[@id='jobOfferResults']/article/table/tbody/tr") for row in jobs: item = JobData() item['title'] = row.select("./td[1]/a/strong/text()").extract()[0].lower() item['link'] = "https://www.tootukassa.ee" + row.select("./td[1]/a/@href").extract()[0] item['location'] = row.select("./td[4]/text()").extract()[0] expired_day_string = row.xpath("./td[3]/text()").extract()[0] item['expire_day'] = datetime.datetime.strptime(expired_day_string, "%d.%m.%Y") items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//table[@class='datatable']/tbody/tr[@class='']") for row in jobs: item = JobData() item['title'] = row.select("./td[1]/a/text()").extract()[0].lower() item['link'] = row.select("./td[1]/a/@href").extract()[0] item['location'] = row.select("./td[2]/text()").extract()[0].lower().strip() item['source'] = "www.elektrik24.ee" item['sponsor'] = 1 items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//div[@class='span9 tablet-span9']/article") for row in jobs: item = JobData() item['title'] = row.select( "./div[@class='media-body']/a/h3/text()").extract()[0].lower() item['link'] = "https://www.manpower.fi" + row.select( "./div[@class='media-body']/a/@href").extract()[0] item['source'] = "www.manpower.fi" items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//tr") jobs.remove(jobs[0]) for row in jobs: item = JobData() item['title'] = row.select("./td[1]/a/text()").extract()[0].lower() item['link'] = row.select("./td[1]/a/@href").extract()[0] item['location'] = row.select( "./td[2]/text()").extract()[0].lower() # We need to specify where we fetch data from item['source'] = "www.opteam.fi" items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//div[@class='jobs-box']/p") for row in jobs: item = JobData() item[ 'link'] = "http://www.eilakaisla.fi/avoimet-tyopaikat" + row.select( "./a/@href").extract()[0] string = row.select("./a/b/text()").extract()[0] list_of_string = string.split(" - ") item['title'] = list_of_string[0].lower() item['location'] = list_of_string[1].lower() item['source'] = "www.eilakaisla.fi" items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//tr[@class='odd' or @class='even']") for row in jobs: item = JobData() if len(row.select("./td[@class='cell1']/a/text()").extract()) > 0: item['title'] = row.select( "./td[@class='cell1']/a/text()").extract()[0].lower() item['link'] = "http://www.toopakkumised.com" + row.select( "./td[@class='cell1']/a/@href").extract()[0] item['location'] = row.select("./td[@class='cell4']/text()" ).extract()[0].lower().strip() item['source'] = "toopakkumised.com" items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//div[@id='main']/div[@class='vacancy']") for row in jobs: item = JobData() item['title'] = row.select("./h3/text()").extract()[0].lower() item['link'] = "http://www.meranti.fi/yhteys/avoimet_tyopaikat" item['location'] = "oulu" # We need to specify where we fetch data from item['source'] = "www.meranti.fi" item['sponsor'] = 1 item['description'] = row.select( "./div[@class='vacancy-content']/text()").extract()[0].strip() if item['description'] == "": item['description'] = row.select( "./div[@class='vacancy-content']/p/text()").extract( )[0].strip() items.append(item) return items
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//div[@id='content']/div") jobs.remove(jobs[-1]) for row in jobs: item = JobData() item['title'] = row.select( "./div[@class='post-content']/h2/a/text()").extract()[0].lower( ) item['link'] = row.select( "./div[@class='post-content']/h2/a/@href").extract()[0] item['location'] = "uusima, helsinki, vantaa, espoo" item['source'] = "www.siivous10.fi" item['description'] = "" info = row.select( "./div[@class='post-content']/p/text()").extract() for des in info: item['description'] += des item['sponsor'] = 1 items.append(item) return items
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//table[@class='results clickable_multi']/tr") for row in jobs: item = JobData() check = row.select("./td[1]/a/text()").extract() if len(check) > 0: item['title'] = check[0].lower() item['link'] = "https://hrr.rekrytointi.com" + row.select( "./td[1]/a/@href").extract()[0] item['location'] = row.select( "./td[4]/a/text()").extract()[0].lower() # We need to specify where we fetch data from item['source'] = "hrr.rekrytointi.com" items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//div[@id='content']/div[6]/div/table/tr") for job in jobs: item = JobData() item['title'] = job.select( "./td[2]/b[1]/a/text()").extract()[0].lower() item['link'] = "http://www.xn--td-fkaa.ee/" + job.select( "./td[2]/b[1]/a/@href").extract()[0] item['location'] = job.select( "./td[2]/b[2]/text()").extract()[0].lower().strip() expired_day_string = job.select( "./td[4]/text()").extract()[1].strip() item['expire_day'] = datetime.datetime.strptime( expired_day_string, "%d.%m.%Y") items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] jobs = hxs.select("//table[@class='job-search-results']/tbody/tr") jobs.remove(jobs[0]) for row in jobs: item = JobData() item['title'] = row.select("./td[1]/a/text()").extract()[0].lower() item['link'] = row.select("./td[1]/a/@href").extract()[0] # We need to specify where we fetch data from item['source'] = "www.sol.fi" day_string = row.select("./td[2]/text()").extract()[0] day_list = day_string.split(" ") item['expire_day'] = datetime.datetime.strptime( day_list[0], "%d.%m.%Y") items.append(item) for item in items: request = Request("%s" % item['link'], callback=self.description_parse) request.meta['item'] = item yield request