예제 #1
0
def main(keyword=None, location=None):
    h = httplib2.Http()
    resp, data = h.request("http://service.dice.com/api/rest/jobsearch/v1/simple.json?text=%s" % keyword, "GET")
    if resp.get("status") != "200":
        print("%s: server error %s" % (__file__, resp.get("status")))
        return 1
    data1 = data.decode("utf-8")

    parser_json = json.loads(data1)
    item_list = parser_json["resultItemList"]
    for item in item_list:
        company = update_company(item["company"])
        platform, created = models.Platform.objects.get_or_create(name=os.path.basename(__file__).replace(".py", ""))
        match = re.search("/result/([\d\S-]+)/", item["detailUrl"])
        if match:
            job_id = match.group(1)
        else:
            log.warning("job id not found %s" % item.get("detailUrl"))
            continue
        try:
            job = models.Job.objects.get(platform=platform, platform_job_id=job_id)
        except models.Job.DoesNotExist:
            job = models.Job()
            job.company = company
            job.position, created = models.Position.objects.get_or_create(name=item["jobTitle"])
            job.location, created = models.Location.objects.get_or_create(place=item["location"])
            job.platform = platform
            job.platform_job_id = job_id

            job.save()
예제 #2
0
def main(keyword=None, location=None):
    h = httplib2.Http(".cache")
    #resp, data = h.request('https://vieclam24h.vn/tim-kiem-viec-lam-nhanh/?hdn_nganh_nghe_cap1=&hdn_dia_diem=&hdn_tu_khoa=%s&hdn_hinh_thuc=&hdn_cap_bac=' % keyword, 'GET')
    resp, data = h.request('https://vieclam24h.vn/tim-kiem-viec-lam-nhanh/&hdn_tu_khoa=%s' % keyword, 'GET')
    if resp.get('status') != '200':
        log.warning('%s: server error %s' % (__file__, resp.get('status')))
        return 1

    country = models.Country.objects.get(iso='VN')
    soup = BeautifulSoup(data, 'html.parser')
    for item in soup.findAll('a', {'class': 'text_grey2'}):
        position, created = models.Position.objects.get_or_create(name=item.string.strip())
        match = re.search('id([\d]+)\.html', item.get('href'))
        if match:
            job_id = match.group(1)
        else:
            log.warning('job id not found %s' % item.get('href'))
            continue
        resp, data = h.request(item.get('href'), 'GET')
        details = BeautifulSoup(data, 'html.parser')
        kwargs = {}
        for detail in details.findAll('p', {'class':'line-icon'}):
            job_value = detail.find('span', {'class':'job_value'})
            if not job_value:
                job_value = detail.find('a', {'class':'job_value'})
            if job_value:
                job_value = job_value.string
            kwargs[DATA_MAP.get(detail.find('i').get('class')[0])] = job_value

        kwargs['location'] = details.find('address').string
        if not kwargs.get('location'):
            log.warning('job location not found %s' % item.get('href'))
            continue
        category, created = models.Category.objects.get_or_create(name=kwargs.get('category') or 'test')
        position.category = category
        kwargs['position'] = position
        try:
            del kwargs['category']
        except KeyError:
            pass

        len_address = len(kwargs.get('location').split(', '))
        street = ', '.join(kwargs.get('location').split(', ')[:len_address-1])
        place = kwargs.get('location').split(', ')[-1]

        job_locations = models.Location.objects.filter(place=place, street=street, country=country)
        if job_locations:
            job_location = job_locations[0]
        else:
            job_location = models.Location()
            job_location.place = place.strip()
            job_location.street = street[:100]
            job_location.country = country
            job_location.save()
        kwargs['location'] = job_location
        if kwargs.get('years_of_experience'):
            kwargs['years_of_experience'] = kwargs['years_of_experience'].replace(' năm', '').replace('Dưới ', '').replace('Hơn ', '')
            kwargs['years_of_experience'] = kwargs['years_of_experience'].replace('Chưa có kinh nghiệm', '0')

        company_name = details.find('h3', {'class': 'font18'}).string
        company = update_company(company_name)
        kwargs['company'] = company
        platform, created = models.Platform.objects.get_or_create(name=os.path.basename(__file__).replace('.py', ''))
        del kwargs[None]
        if kwargs.get('salary'):
            match = re.search('([\d]+) - ([\d]+) triệu$', kwargs['salary'])
            if match:
                kwargs['salary'] = int(match.group(1))
            else:
                del kwargs['salary']
        if kwargs.get('hours_per_week') == 'Toàn thời gian cố định':
            kwargs['hours_per_week'] = 40
        try:
            job = models.Job.objects.get(platform=platform, platform_job_id=job_id)
        except models.Job.DoesNotExist:
            kwargs['platform'] = platform
            kwargs['platform_job_id'] = job_id
            job = models.Job.objects.create(**kwargs)