Пример #1
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//div[@id='fragment-2']/ul[@class='link-list']/li")

        for row in jobs:
            item = JobData()
            item['link'] = row.select("./a/@href").extract()[0]
            string = row.select("./a/text()").extract()[0]
            list_of_string = string.split(", ")
            count = string.count(", ")
            if count == 1:
                item['title'] = list_of_string[0].lower()
                item['location'] = list_of_string[1].lower()
            else:
                item['title'] = list_of_string[0].lower()
                item['location'] = list_of_string[-1].lower()

            item['source'] = "www.vmp.fi"
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'], callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #2
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//tr")

        for row in jobs:
            item = JobData()
            item['title'] = row.select("./td[1]/a/text()").extract()
            item['link'] = row.select("./td[1]/a/@href").extract()
            item['location'] = row.select("./td[6]/div/text()").extract()
            #item['location'] = "espoo, vantaa"

            # We need to specify where we fetch data from
            item['source'] = "www.staffpoint.fi"
            items.append(item)

        # it return a first empty rows so we need to delete it before writing.
        items.remove(items[0])

        for item in items:
            item['title'] = item['title'][0].lower()
            item['link'] = "https://www.staffpoint.fi" + item['link'][0]
            item['location'] = item['location'][0]
            item['location'] = " ".join(item['location'].split()).lower()
            # check if location is empty or not
            if item['location'] == "":
                item['location'] = "find more about location on website"

        # Now we are going to get the description of the job
        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #3
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select(
            "//div[@class='span9 tablet-span9']/article[@class='mp-job-hit media']"
        )

        for row in jobs:
            item = JobData()
            string = row.select(
                "./div[@class='media-body']/a/h3/text()").extract()[0].lower()
            if ", " in string:
                string_list = string.split(", ")
                item['title'] = string_list[0]
                item['location'] = string_list[1]
            else:
                item['title'] = string
                item['location'] = "all"

            item['link'] = "https://www.experis.fi" + row.select(
                "./div[@class='media-body']/a/@href").extract()[0]
            item['source'] = "www.experis.fi"
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #4
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//ul[@class='feed']/li")

        for row in jobs:
            item = JobData()
            item['link'] = "http://www.alrekry.fi/" + row.select("./a/@href").extract()[0].lower()
            item['title'] = row.select("./a/b/text()").extract()[0].lower()
            item['source'] = "www.alrekry.fi"
            string = row.select("./a/text()").extract()[0]
            string = string.replace(", ", "")
            string = string.strip()
            if string == (""):
                string = "unknow unknow"
            list_of_string = string.split(" ")
            if len(list_of_string) == 1:
                item['location'] = list_of_string[0].lower()
            else:
                item['location'] = list_of_string[1].lower()
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'], callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #5
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//div[@id='jobOfferResults']/article/table/tbody/tr")

        for row in jobs:
            item = JobData()
            item['title'] = row.select("./td[1]/a/strong/text()").extract()[0].lower()
            item['link'] = "https://www.tootukassa.ee" + row.select("./td[1]/a/@href").extract()[0]
            item['location'] = row.select("./td[4]/text()").extract()[0]
            expired_day_string = row.xpath("./td[3]/text()").extract()[0]
            item['expire_day'] = datetime.datetime.strptime(expired_day_string, "%d.%m.%Y")
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'], callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #6
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//table[@class='datatable']/tbody/tr[@class='']")

        for row in jobs:
            item = JobData()
            item['title'] = row.select("./td[1]/a/text()").extract()[0].lower()
            item['link'] = row.select("./td[1]/a/@href").extract()[0]
            item['location'] = row.select("./td[2]/text()").extract()[0].lower().strip()
            item['source'] = "www.elektrik24.ee"
            item['sponsor'] = 1
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'], callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #7
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//div[@class='span9 tablet-span9']/article")

        for row in jobs:
            item = JobData()
            item['title'] = row.select(
                "./div[@class='media-body']/a/h3/text()").extract()[0].lower()
            item['link'] = "https://www.manpower.fi" + row.select(
                "./div[@class='media-body']/a/@href").extract()[0]
            item['source'] = "www.manpower.fi"
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #8
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//tr")
        jobs.remove(jobs[0])

        for row in jobs:
            item = JobData()
            item['title'] = row.select("./td[1]/a/text()").extract()[0].lower()
            item['link'] = row.select("./td[1]/a/@href").extract()[0]
            item['location'] = row.select(
                "./td[2]/text()").extract()[0].lower()
            # We need to specify where we fetch data from
            item['source'] = "www.opteam.fi"
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #9
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//div[@class='jobs-box']/p")
        for row in jobs:
            item = JobData()
            item[
                'link'] = "http://www.eilakaisla.fi/avoimet-tyopaikat" + row.select(
                    "./a/@href").extract()[0]
            string = row.select("./a/b/text()").extract()[0]
            list_of_string = string.split(" - ")
            item['title'] = list_of_string[0].lower()
            item['location'] = list_of_string[1].lower()
            item['source'] = "www.eilakaisla.fi"
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #10
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//tr[@class='odd' or @class='even']")
        for row in jobs:
            item = JobData()
            if len(row.select("./td[@class='cell1']/a/text()").extract()) > 0:
                item['title'] = row.select(
                    "./td[@class='cell1']/a/text()").extract()[0].lower()
                item['link'] = "http://www.toopakkumised.com" + row.select(
                    "./td[@class='cell1']/a/@href").extract()[0]
                item['location'] = row.select("./td[@class='cell4']/text()"
                                              ).extract()[0].lower().strip()
                item['source'] = "toopakkumised.com"
                items.append(item)

        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #11
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//div[@id='main']/div[@class='vacancy']")

        for row in jobs:
            item = JobData()
            item['title'] = row.select("./h3/text()").extract()[0].lower()
            item['link'] = "http://www.meranti.fi/yhteys/avoimet_tyopaikat"
            item['location'] = "oulu"
            # We need to specify where we fetch data from
            item['source'] = "www.meranti.fi"
            item['sponsor'] = 1
            item['description'] = row.select(
                "./div[@class='vacancy-content']/text()").extract()[0].strip()
            if item['description'] == "":
                item['description'] = row.select(
                    "./div[@class='vacancy-content']/p/text()").extract(
                    )[0].strip()
            items.append(item)

        return items
Пример #12
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     items = []
     jobs = hxs.select("//div[@id='content']/div")
     jobs.remove(jobs[-1])
     for row in jobs:
         item = JobData()
         item['title'] = row.select(
             "./div[@class='post-content']/h2/a/text()").extract()[0].lower(
             )
         item['link'] = row.select(
             "./div[@class='post-content']/h2/a/@href").extract()[0]
         item['location'] = "uusima, helsinki, vantaa, espoo"
         item['source'] = "www.siivous10.fi"
         item['description'] = ""
         info = row.select(
             "./div[@class='post-content']/p/text()").extract()
         for des in info:
             item['description'] += des
         item['sponsor'] = 1
         items.append(item)
     return items
Пример #13
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//table[@class='results clickable_multi']/tr")

        for row in jobs:
            item = JobData()
            check = row.select("./td[1]/a/text()").extract()
            if len(check) > 0:
                item['title'] = check[0].lower()
                item['link'] = "https://hrr.rekrytointi.com" + row.select(
                    "./td[1]/a/@href").extract()[0]
                item['location'] = row.select(
                    "./td[4]/a/text()").extract()[0].lower()
                # We need to specify where we fetch data from
                item['source'] = "hrr.rekrytointi.com"
                items.append(item)

        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #14
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//div[@id='content']/div[6]/div/table/tr")

        for job in jobs:
            item = JobData()
            item['title'] = job.select(
                "./td[2]/b[1]/a/text()").extract()[0].lower()
            item['link'] = "http://www.xn--td-fkaa.ee/" + job.select(
                "./td[2]/b[1]/a/@href").extract()[0]
            item['location'] = job.select(
                "./td[2]/b[2]/text()").extract()[0].lower().strip()
            expired_day_string = job.select(
                "./td[4]/text()").extract()[1].strip()
            item['expire_day'] = datetime.datetime.strptime(
                expired_day_string, "%d.%m.%Y")
            items.append(item)
        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request
Пример #15
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        jobs = hxs.select("//table[@class='job-search-results']/tbody/tr")
        jobs.remove(jobs[0])

        for row in jobs:
            item = JobData()
            item['title'] = row.select("./td[1]/a/text()").extract()[0].lower()
            item['link'] = row.select("./td[1]/a/@href").extract()[0]
            # We need to specify where we fetch data from
            item['source'] = "www.sol.fi"
            day_string = row.select("./td[2]/text()").extract()[0]
            day_list = day_string.split(" ")
            item['expire_day'] = datetime.datetime.strptime(
                day_list[0], "%d.%m.%Y")
            items.append(item)

        for item in items:
            request = Request("%s" % item['link'],
                              callback=self.description_parse)
            request.meta['item'] = item
            yield request