def run(url):
    soup = get_soup(url)

    jobs_list = soup.select('div[class*="JobGrid-"]')[0]
    job_class= Job(organization, "")
    job_class.organization_id= organization_id
    insert_count= 0
    for job_entry in jobs_list.find_all('a'):
        job_class.info_link = 'https://path.catsone.com' + job_entry['href']
        job_row = job_entry.find('div', {'class': 'row'})
        job_divs = job_row.find_all('div')
        job_class.title = job_divs[0].text.strip()
        job_class.location = clean_location(job_divs[2].text.strip())
        job_class.zip_code = city_to_zip(job_class.location)
        insert_count+= job_insert(job_class)
        # Possible to get more info by scraping each job link, but the listings are extremely poorly written/standardized; scraper below works for most of the listings, but a few poorly written listings break the scraper
        # job_soup = get_soup(info_link)
        # job_description = job_soup.find('div',{'class':'Job__StyledDescription-s1h17u0t-0'})
        # if '\n' in job_description.find_all('strong')[0].text:
        #     full_or_part = job_description.find_all('strong')[0].text.split('\n')[1].strip()
        #     salary = job_description.find_all('strong')[0].text.split('\n')[2].strip().split(': ')[1]
        # else:
        #     full_or_part = job_description.find_all('strong')[1].text.strip()
        #     salary = job_description.find_all('strong')[2].text.split('\n')[0].split(':')[1].strip()
    return insert_count
Пример #2
0
def run(url):
    soup = get_soup(url)

    jobs_list = soup.find_all('div', {'class': 'list-data'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        job_info = job_entry.find('div', {'class': 'job-info'})
        job_class.title = job_info.find('span', {
            'class': 'job-title'
        }).text.strip()
        job_class.info_link = job_info.h4.a['href']
        job_class.full_or_part = job_entry.find('div', {
            'class': 'job-type'
        }).text.strip()
        job_class.location = clean_location(
            job_entry.find('div', {
                'class': 'job-location'
            }).text.strip())
        job_class.zip_code = city_to_zip(job_class.location)
        relative_date = job_entry.find('div', {
            'class': 'job-date'
        }).text.strip().split(' ')
        job_class.post_date = date_ago(int(relative_date[1]), relative_date[2])
        job_class.summary = job_entry.find('div', {
            'class': 'job-description'
        }).p.text.strip()
        insert_count += job_insert(job_class)
    return insert_count
Пример #3
0
def run(url):
    soup = get_soup(url)
    jobs_table = soup.find('table', {'id': 'job-result-table'})
    job_class = Job(organization, "")
    job_class.post_date = ""
    job_class.organization_id = organization_id
    insert_count = 0
    for job_row in jobs_table.find_all('tr', {'class': 'job-result'}):
        job_title_cell = job_row.find('td', {'class': 'job-result-title-cell'})
        job_class.title = job_title_cell.a.text.strip()
        job_class.info_link = 'https://pennylanecenters.jobs.net' + \
            job_title_cell.a['href']
        job_class.location = clean_location(
            job_row.find('div', {
                'class': 'job-location-line'
            }).text)
        job_class.zip_code = city_to_zip(job_class.location)
        # Get Job Soup
        job_soup = get_soup(job_class.info_link)
        job_class.full_or_part = job_soup.find('li', {
            'class': 'job-employee-type'
        }).find('div', {
            'class': 'secondary-text-color'
        }).text
        job_class.post_date = string_to_date(
            job_soup.find('li', {
                'class': 'job-date-posted'
            }).find('div', {
                'class': 'secondary-text-color'
            }).text)
        insert_count += job_insert(job_class)
    return insert_count
Пример #4
0
def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.find('table',{'class':'srJobList'}).tbody.find_all('tr')[1:]

    for job_entry in jobs_list:
        globals.job_title = job_entry.find('td',{'class':'srJobListJobTitle'}).text.strip()
        onClickLink = job_entry['onclick']
        globals.info_link = onClickLink[13:len(onClickLink)-3]
        globals.full_or_part = job_entry.find('td',{'class':'srJobListTypeOfEmployment'}).text
        globals.job_location = clean_location(job_entry.find('td',{'class':'srJobListLocation'}).text)
        globals.job_zip_code = city_to_zip(globals.job_location)
        update_db(organization)
def run(url):
    soup = get_soup(url)

    jobs_list = soup.select('div[class*="JobGrid-"]')[0]

    for job_entry in jobs_list.find_all('a'):
        globals.info_link = 'https://path.catsone.com' + job_entry['href']
        job_row = job_entry.find('div', {'class':'row'})
        job_divs = job_row.find_all('div')
        globals.job_title = job_divs[0].text.strip()
        globals.job_location = clean_location(job_divs[2].text.strip())
        globals.job_zip_code = city_to_zip(globals.job_location)
        update_db(organization)
Пример #6
0
def run(url):
    globals.job_post_date = ''
    next_page_url = url
    soup = get_javascript_soup_delayed(next_page_url, 'job-table-title')

    while soup:
        job_table = soup.find('tbody')
        for job_row in job_table.find_all('tr'):
            globals.job_title = job_row.find('td', {
                'class': 'job-table-title'
            }).a.text.strip()
            globals.info_link = 'https://www.governmentjobs.com' + job_row.find(
                'td', {
                    'class': 'job-table-title'
                }).a['href']
            globals.salary = job_row.find('td', {
                'class': 'job-table-salary'
            }).text
            globals.full_or_part = job_row.find('td', {
                'class': 'job-table-type'
            }).text
            # Get soup for job listing to get more info
            job_soup = get_soup(globals.info_link)
            info_container = job_soup.find('div',
                                           {'class': 'summary container'})
            globals.job_location = clean_location(
                info_container.find('div', {
                    'id': 'location-label-id'
                }).parent.find_all('div')[2].text)
            globals.job_zip_code = city_to_zip(globals.job_location)
            globals.job_summary = job_soup.find('div', {
                'id': 'details-info'
            }).find('p').text
            update_db(organization)
            reset_vars()
        if not 'disabled' in soup.find('li', {
                'class': 'PagedList-skipToNext'
        }).get("class"):
            next_page_url = 'https://www.governmentjobs.com/careers/lahsa?' + soup.find(
                'li', {
                    'class': 'PagedList-skipToNext'
                }).a['href'].split('?')[1]
            soup = get_javascript_soup_delayed(next_page_url,
                                               'job-table-title')
        else:
            soup = False
def run(url):
    globals.job_post_date = ''
    soup = get_soup(url)
    jobs_table = soup.find('table',{'id':'job-result-table'})

    for job_row in jobs_table.find_all('tr',{'class':'job-result'}):
        job_title_cell = job_row.find('td',{'class':'job-result-title-cell'})
        globals.job_title = job_title_cell.a.text.strip()
        globals.info_link = 'https://pennylanecenters.jobs.net' + job_title_cell.a['href']
        globals.job_summary = globals.info_link
        globals.job_location = clean_location(job_row.find('div',{'class':'job-location-line'}).text)
        globals.job_zip_code = city_to_zip(globals.job_location)
        # Get Job Soup
        job_soup = get_soup(globals.info_link)
        globals.full_or_part = job_soup.find('li',{'class':'job-employee-type'}).find('div',{'class':'secondary-text-color'}).text
        globals.job_post_date = string_to_date(job_soup.find('li',{'class':'job-date-posted'}).find('div',{'class':'secondary-text-color'}).text)
        update_db(organization)
        reset_vars()
def run(url):
    next_page_url = url
    soup = get_javascript_soup_delayed(next_page_url, 'job-table-title')
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    while soup:
        job_table = soup.find('tbody')
        for job_row in job_table.find_all('tr'):
            job_class.title = job_row.find('td', {
                'class': 'job-table-title'
            }).a.text.strip()
            job_class.info_link = 'https://www.governmentjobs.com' + \
                job_row.find('td', {'class': 'job-table-title'}).a['href']
            job_class.salary = job_row.find('td', {
                'class': 'job-table-salary'
            }).text
            job_class.full_or_part = job_row.find('td', {
                'class': 'job-table-type'
            }).text
            # Get soup for job listing to get more info
            job_soup = get_soup(job_class.info_link)
            info_container = job_soup.find('div',
                                           {'class': 'summary container'})
            job_class.location = clean_location(
                info_container.find('div', {
                    'id': 'location-label-id'
                }).parent.find_all('div')[2].text)
            job_class.zip_code = city_to_zip(job_class.location)
            job_class.summary = job_soup.find('div', {
                'id': 'details-info'
            }).find('p').text
            insert_count += job_insert(job_class)
        if not 'disabled' in soup.find('li', {
                'class': 'PagedList-skipToNext'
        }).get("class"):
            next_page_url = 'https://www.governmentjobs.com/careers/lahsa?' + \
                soup.find('li', {'class': 'PagedList-skipToNext'}
                          ).a['href'].split('?')[1]
            soup = get_javascript_soup_delayed(next_page_url,
                                               'job-table-title')
        else:
            soup = False
    return insert_count
def run(url):
    soup = get_javascript_soup(url)

    jobs_list = soup.find(
        'table', {'id': 'cws-search-results'}).find_all('tr')[1:]
    job_class= Job(organization, "")
    job_class.organization_id= organization_id
    insert_count= 0
    for job_entry in jobs_list:
        row_cells = job_entry.find_all('td')
        job_class.title = row_cells[1].a.text.strip()
        job_class.info_link = row_cells[1].a['href']
        job_class.location = clean_location(row_cells[2].text)
        job_class.zip_code = city_to_zip(job_class.location)
        job_soup = get_soup(job_class.info_link)
        job_class.full_or_part = job_soup.find(
            text="Employment Duration:").parent.parent.b.text.strip()
        insert_count+= job_insert(job_class)
    return insert_count
Пример #10
0
def run(url):
    soup = get_javascript_soup(url)

    job_listings = soup.find_all('div', {'class': 'jobInfo'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in job_listings:
        job_class.title = job_listing.find('span', {
            'class': 'jobTitle'
        }).a.text.strip()
        job_class.info_link = 'https://www.paycomonline.net' + \
            job_listing.find('span', {'class': 'jobTitle'}).a['href']
        if job_listing.find('span', {'class': 'jobLocation'}).text:
            job_class.location = globals.clean_location(
                job_listing.find('span', {
                    'class': 'jobLocation'
                }).text.split(' - ')[1])
            job_class.zip_code = globals.city_to_zip(job_class.location)
        if job_listing.find('span', {'class': 'jobDescription'}).text:
            job_class.summary = job_listing.find('span', {
                'class': 'jobDescription'
            }).text.strip()
        if job_listing.find('span', {'class': 'jobType'}).text:
            if ('ft' in str(
                    job_listing.find('span', {
                        'class': 'jobType'
                    }).text).lower()) or ('full' in str(
                        job_listing.find('span', {
                            'class': 'jobType'
                        }).text).lower()):
                job_class.full_or_part = 'full'
            else:
                job_class.full_or_part = 'part'
        insert_count += job_insert(job_class)
    return insert_count