def run(url): soup = get_javascript_soup_delayed(url, 'current-openings-item') jobs_list = soup.find_all('div', {'class':'current-openings-details'}) for job_entry in jobs_list: globals.job_title = job_entry.find('span', {'class':'current-opening-title'}).text.strip() if job_entry.find('span', {'class':'current-opening-location-item'}): globals.job_location = job_entry.find('span', {'class':'current-opening-location-item'}).text.strip() # Calculate post date relative to current date and store it posted_ago = job_entry.find('span', {'class':'current-opening-post-date'}).text.split(' ') if posted_ago[0] == 'a': globals.job_post_date = date_ago(1, posted_ago[1]) elif posted_ago[0].lower() == 'yesterday': globals.job_post_date = date_ago(1, 'day') elif posted_ago[0] == '30+': # over 30 days ago globals.job_post_date = date_ago(31, posted_ago[1]) else: globals.job_post_date = date_ago(int(posted_ago[0]), posted_ago[1]) if job_entry.find('span', {'class':'current-opening-worker-catergory'}): globals.full_or_part = job_entry.find('span', {'class':'current-opening-worker-catergory'}).text.strip() globals.info_link = 'https://workforcenow.adp.com/mascsr/default/mdf/recruitment/recruitment.html?cid=b4842dc2-cd32-4f0f-88d3-b259fbc96f09&ccId=19000101_000001&type=MP&lang' globals.job_summary = globals.info_link update_db(organization) reset_vars()
def run(url): soup = get_javascript_soup(url) jobs_list = soup.select('div.wpb_wrapper > p > a') for job_entry in jobs_list: globals.job_title = job_entry.text.strip() globals.info_link = job_entry['href'] update_db(organization)
def run(url): soup = get_soup(url) jobs_list = soup.find_all('h4') for job_entry in jobs_list: globals.job_title = job_entry.a.text globals.info_link = job_entry.a['href'] globals.job_summary = globals.info_link listing_soup = get_soup(globals.info_link) if listing_soup.body.find_all('p', string="Job Type: Full-time"): globals.full_or_part = 'Full-time' elif listing_soup.body.find_all('p', string="Job Type: Part-time"): globals.full_or_part = 'Part-time' date_text = listing_soup.body.find_all( 'span', {'class': 'subtitle'})[0].text.split() month_string = date_text[2] day = int(date_text[3][0:len(date_text[3]) - 1]) year = int(date_text[4]) month = month_to_num(month_string) globals.job_post_date = datetime(year, month, day) update_db(organization)
def run(url): soup = get_javascript_soup(url) jobs_list = soup.select('div.entry-content div.small-12.columns > p > a') for job_entry in jobs_list: globals.job_title = job_entry.text globals.info_link = job_entry['href'] update_db(organization)
def run(url): soup = get_soup("https://www.211la.org/careers") jobs_list = soup.find_all("div", {"class": "jobBtn"}) for job_entry in jobs_list: for child in job_entry.find_all("a"): globals.job_title = child.text globals.info_link = child.get('href') update_db(organization)
def run(url): soup = get_soup(url) jobs_div = soup.find('div', {'class':'sqs-block-content'}) jobs_list = jobs_div.find_all('p') for job_entry in jobs_list[4:len(jobs_list)-3]: globals.job_title = job_entry.a.text.strip() globals.info_link = 'https://lafh.org' + job_entry.a['href'] update_db(organization)
def run(url): soup = get_javascript_soup(url) jobs_list = soup.find('table',{'class':'srJobList'}).tbody.find_all('tr')[1:] for job_entry in jobs_list: globals.job_title = job_entry.find('td',{'class':'srJobListJobTitle'}).text.strip() onClickLink = job_entry['onclick'] globals.info_link = onClickLink[13:len(onClickLink)-3] globals.full_or_part = job_entry.find('td',{'class':'srJobListTypeOfEmployment'}).text globals.job_location = clean_location(job_entry.find('td',{'class':'srJobListLocation'}).text) globals.job_zip_code = city_to_zip(globals.job_location) update_db(organization)
def run(url): soup = get_soup(url) jobs_list = soup.select('div[class*="JobGrid-"]')[0] for job_entry in jobs_list.find_all('a'): globals.info_link = 'https://path.catsone.com' + job_entry['href'] job_row = job_entry.find('div', {'class':'row'}) job_divs = job_row.find_all('div') globals.job_title = job_divs[0].text.strip() globals.job_location = clean_location(job_divs[2].text.strip()) globals.job_zip_code = city_to_zip(globals.job_location) update_db(organization)
def run(url): soup = get_soup(url) jobs_list = soup.find('ul', {'class': 'lcp_catlist'}) for job_entry in jobs_list.find_all('li'): globals.job_title = job_entry.a.text.strip() globals.info_link = job_entry.a['href'] job_soup = get_soup(globals.info_link) summary_match = job_soup.find(text=re.compile("Position Purpose:")) if summary_match is not None: globals.job_summary = summary_match.parent.parent.text else: raise globals.ParseError(globals.info_link, 'Cannot find job summary') update_db(organization)
def run(url): globals.job_post_date = '' next_page_url = url soup = get_javascript_soup_delayed(next_page_url, 'job-table-title') while soup: job_table = soup.find('tbody') for job_row in job_table.find_all('tr'): globals.job_title = job_row.find('td', { 'class': 'job-table-title' }).a.text.strip() globals.info_link = 'https://www.governmentjobs.com' + job_row.find( 'td', { 'class': 'job-table-title' }).a['href'] globals.salary = job_row.find('td', { 'class': 'job-table-salary' }).text globals.full_or_part = job_row.find('td', { 'class': 'job-table-type' }).text # Get soup for job listing to get more info job_soup = get_soup(globals.info_link) info_container = job_soup.find('div', {'class': 'summary container'}) globals.job_location = clean_location( info_container.find('div', { 'id': 'location-label-id' }).parent.find_all('div')[2].text) globals.job_zip_code = city_to_zip(globals.job_location) globals.job_summary = job_soup.find('div', { 'id': 'details-info' }).find('p').text update_db(organization) reset_vars() if not 'disabled' in soup.find('li', { 'class': 'PagedList-skipToNext' }).get("class"): next_page_url = 'https://www.governmentjobs.com/careers/lahsa?' + soup.find( 'li', { 'class': 'PagedList-skipToNext' }).a['href'].split('?')[1] soup = get_javascript_soup_delayed(next_page_url, 'job-table-title') else: soup = False
def run(url): globals.job_post_date = '' soup = get_soup(url) jobs_table = soup.find('table',{'id':'job-result-table'}) for job_row in jobs_table.find_all('tr',{'class':'job-result'}): job_title_cell = job_row.find('td',{'class':'job-result-title-cell'}) globals.job_title = job_title_cell.a.text.strip() globals.info_link = 'https://pennylanecenters.jobs.net' + job_title_cell.a['href'] globals.job_summary = globals.info_link globals.job_location = clean_location(job_row.find('div',{'class':'job-location-line'}).text) globals.job_zip_code = city_to_zip(globals.job_location) # Get Job Soup job_soup = get_soup(globals.info_link) globals.full_or_part = job_soup.find('li',{'class':'job-employee-type'}).find('div',{'class':'secondary-text-color'}).text globals.job_post_date = string_to_date(job_soup.find('li',{'class':'job-date-posted'}).find('div',{'class':'secondary-text-color'}).text) update_db(organization) reset_vars()
def run(url): soup = get_soup(url) article = soup.find('article') jobs_list = article.find_all('p') for job_entry in jobs_list: if 'Posted ' in job_entry.text: job_element = job_entry.find('a') globals.job_title = job_element.text globals.info_link = job_element['href'] globals.job_summary = globals.info_link date = job_entry.text.split('Posted ')[1].split('/') month = int(date[0]) day = int(date[1]) year = int(date[2]) globals.job_post_date = datetime(year, month, day) update_db(organization) reset_vars()
def run(url): soup = get_javascript_soup(url) job_listings = soup.find_all('div',{'class':'job-listing-job-item'}) for job_listing in job_listings: job_description = job_listing.find_all('span') # Get job title and link globals.job_title = job_description[0].a.text globals.info_link = 'https://recruiting.paylocity.com' + job_description[0].a['href'] globals.job_summary = globals.info_link # Get date as string date = job_description[1].text # Clean up date string by removing trailing -'s, then split and convert to datetime object if date[len(date)-2] == '-': date = date[0:len(date)-3] date = date.strip().split('/') month = int(date[0]) day = int(date[1]) year = int(date[2]) globals.job_post_date = datetime(year, month, day) # Get Location globals.job_location = job_listing.find('div',{'class':'location-column'}).span.text # Get soup of job listing to scrape more info listing_soup = get_soup(globals.info_link) listing_body = listing_soup.find('body').find_all('p') # Retrieve Full/Part-time and Salary info if available if 'Location' in listing_body[0].text: location_string = listing_body[0].text.split(':')[1].lstrip() zip_code_result = re.search(r'(\d{5})', location_string) if zip_code_result != None: globals.job_zip_code = zip_code_result.group(1) # can't get city since there's no standard. It could be # "Hollywood", "Koreatown, Los angeles, California", or even # "Multiple Locations" if len(globals.job_zip_code) == 0: globals.job_zip_code = globals.city_to_zip(globals.job_location) if 'Status' in listing_body[1].text: globals.full_or_part = listing_body[1].text[8:] if 'Salary' in listing_body[2].text: globals.salary = listing_body[2].text[14:] update_db(organization)