Python update_db示例，globals.update_db Python示例

示例#1

0

显示文件

文件： catholic-charities-of-los-angeles-inc.py 项目： nclairesays/jobs-for-hope

def run(url):
    soup = get_javascript_soup_delayed(url, 'current-openings-item')
    jobs_list = soup.find_all('div', {'class':'current-openings-details'})

    for job_entry in jobs_list:
        globals.job_title = job_entry.find('span', {'class':'current-opening-title'}).text.strip()
        if job_entry.find('span', {'class':'current-opening-location-item'}):
            globals.job_location = job_entry.find('span', {'class':'current-opening-location-item'}).text.strip()
        # Calculate post date relative to current date and store it
        posted_ago = job_entry.find('span', {'class':'current-opening-post-date'}).text.split(' ')
        if posted_ago[0] == 'a':
            globals.job_post_date = date_ago(1, posted_ago[1])
        elif posted_ago[0].lower() == 'yesterday':
            globals.job_post_date = date_ago(1, 'day')
        elif posted_ago[0] == '30+':
            # over 30 days ago
            globals.job_post_date = date_ago(31, posted_ago[1])
        else:
            globals.job_post_date = date_ago(int(posted_ago[0]), posted_ago[1])
        if job_entry.find('span', {'class':'current-opening-worker-catergory'}):
            globals.full_or_part = job_entry.find('span', {'class':'current-opening-worker-catergory'}).text.strip()
        globals.info_link = 'https://workforcenow.adp.com/mascsr/default/mdf/recruitment/recruitment.html?cid=b4842dc2-cd32-4f0f-88d3-b259fbc96f09&ccId=19000101_000001&type=MP&lang'
        globals.job_summary = globals.info_link
        update_db(organization)
        reset_vars()

示例#2

0

显示文件

文件： los-angeles-centers-for-alcohol-and-drug-abuse.py 项目： nclairesays/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.select('div.wpb_wrapper > p > a')
    for job_entry in jobs_list:
        globals.job_title = job_entry.text.strip()
        globals.info_link = job_entry['href']
        update_db(organization)

示例#3

0

显示文件

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find_all('h4')

    for job_entry in jobs_list:
        globals.job_title = job_entry.a.text
        globals.info_link = job_entry.a['href']
        globals.job_summary = globals.info_link
        listing_soup = get_soup(globals.info_link)

        if listing_soup.body.find_all('p', string="Job Type: Full-time"):
            globals.full_or_part = 'Full-time'
        elif listing_soup.body.find_all('p', string="Job Type: Part-time"):
            globals.full_or_part = 'Part-time'

        date_text = listing_soup.body.find_all(
            'span', {'class': 'subtitle'})[0].text.split()

        month_string = date_text[2]
        day = int(date_text[3][0:len(date_text[3]) - 1])
        year = int(date_text[4])
        month = month_to_num(month_string)

        globals.job_post_date = datetime(year, month, day)

        update_db(organization)

示例#4

0

显示文件

文件： center-for-the-pacific-asian-family-inc.py 项目： nclairesays/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.select('div.entry-content div.small-12.columns > p > a')

    for job_entry in jobs_list:
        globals.job_title = job_entry.text
        globals.info_link = job_entry['href']
        update_db(organization)

示例#5

0

显示文件

文件： 211-la-county.py 项目： nclairesays/jobs-for-hope

def run(url):
    soup = get_soup("https://www.211la.org/careers")
    jobs_list = soup.find_all("div", {"class": "jobBtn"})

    for job_entry in jobs_list:
        for child in job_entry.find_all("a"):
            globals.job_title = child.text
            globals.info_link = child.get('href')
        update_db(organization)

示例#6

0

显示文件

def run(url):
    soup = get_soup(url)
    jobs_div = soup.find('div', {'class':'sqs-block-content'})
    jobs_list = jobs_div.find_all('p')

    for job_entry in jobs_list[4:len(jobs_list)-3]:
        globals.job_title = job_entry.a.text.strip()
        globals.info_link = 'https://lafh.org' + job_entry.a['href']
        update_db(organization)

示例#7

0

显示文件

文件： st-joseph-center.py 项目： nclairesays/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    jobs_list = soup.find('table',{'class':'srJobList'}).tbody.find_all('tr')[1:]

    for job_entry in jobs_list:
        globals.job_title = job_entry.find('td',{'class':'srJobListJobTitle'}).text.strip()
        onClickLink = job_entry['onclick']
        globals.info_link = onClickLink[13:len(onClickLink)-3]
        globals.full_or_part = job_entry.find('td',{'class':'srJobListTypeOfEmployment'}).text
        globals.job_location = clean_location(job_entry.find('td',{'class':'srJobListLocation'}).text)
        globals.job_zip_code = city_to_zip(globals.job_location)
        update_db(organization)

示例#8

0

显示文件

文件： people-assisting-the-homeless.py 项目： nclairesays/jobs-for-hope

def run(url):
    soup = get_soup(url)

    jobs_list = soup.select('div[class*="JobGrid-"]')[0]

    for job_entry in jobs_list.find_all('a'):
        globals.info_link = 'https://path.catsone.com' + job_entry['href']
        job_row = job_entry.find('div', {'class':'row'})
        job_divs = job_row.find_all('div')
        globals.job_title = job_divs[0].text.strip()
        globals.job_location = clean_location(job_divs[2].text.strip())
        globals.job_zip_code = city_to_zip(globals.job_location)
        update_db(organization)

示例#9

0

显示文件

文件： jwch-institute-inc.py 项目： nclairesays/jobs-for-hope

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find('ul', {'class': 'lcp_catlist'})

    for job_entry in jobs_list.find_all('li'):
        globals.job_title = job_entry.a.text.strip()
        globals.info_link = job_entry.a['href']
        job_soup = get_soup(globals.info_link)
        summary_match = job_soup.find(text=re.compile("Position Purpose:"))
        if summary_match is not None:
            globals.job_summary = summary_match.parent.parent.text
        else:
            raise globals.ParseError(globals.info_link,
                                     'Cannot find job summary')
        update_db(organization)

示例#10

0

显示文件

def run(url):
    globals.job_post_date = ''
    next_page_url = url
    soup = get_javascript_soup_delayed(next_page_url, 'job-table-title')

    while soup:
        job_table = soup.find('tbody')
        for job_row in job_table.find_all('tr'):
            globals.job_title = job_row.find('td', {
                'class': 'job-table-title'
            }).a.text.strip()
            globals.info_link = 'https://www.governmentjobs.com' + job_row.find(
                'td', {
                    'class': 'job-table-title'
                }).a['href']
            globals.salary = job_row.find('td', {
                'class': 'job-table-salary'
            }).text
            globals.full_or_part = job_row.find('td', {
                'class': 'job-table-type'
            }).text
            # Get soup for job listing to get more info
            job_soup = get_soup(globals.info_link)
            info_container = job_soup.find('div',
                                           {'class': 'summary container'})
            globals.job_location = clean_location(
                info_container.find('div', {
                    'id': 'location-label-id'
                }).parent.find_all('div')[2].text)
            globals.job_zip_code = city_to_zip(globals.job_location)
            globals.job_summary = job_soup.find('div', {
                'id': 'details-info'
            }).find('p').text
            update_db(organization)
            reset_vars()
        if not 'disabled' in soup.find('li', {
                'class': 'PagedList-skipToNext'
        }).get("class"):
            next_page_url = 'https://www.governmentjobs.com/careers/lahsa?' + soup.find(
                'li', {
                    'class': 'PagedList-skipToNext'
                }).a['href'].split('?')[1]
            soup = get_javascript_soup_delayed(next_page_url,
                                               'job-table-title')
        else:
            soup = False

示例#11

0

显示文件

文件： penny_lane_centers.py 项目： nclairesays/jobs-for-hope

def run(url):
    globals.job_post_date = ''
    soup = get_soup(url)
    jobs_table = soup.find('table',{'id':'job-result-table'})

    for job_row in jobs_table.find_all('tr',{'class':'job-result'}):
        job_title_cell = job_row.find('td',{'class':'job-result-title-cell'})
        globals.job_title = job_title_cell.a.text.strip()
        globals.info_link = 'https://pennylanecenters.jobs.net' + job_title_cell.a['href']
        globals.job_summary = globals.info_link
        globals.job_location = clean_location(job_row.find('div',{'class':'job-location-line'}).text)
        globals.job_zip_code = city_to_zip(globals.job_location)
        # Get Job Soup
        job_soup = get_soup(globals.info_link)
        globals.full_or_part = job_soup.find('li',{'class':'job-employee-type'}).find('div',{'class':'secondary-text-color'}).text
        globals.job_post_date = string_to_date(job_soup.find('li',{'class':'job-date-posted'}).find('div',{'class':'secondary-text-color'}).text)
        update_db(organization)
        reset_vars()

示例#12

0

显示文件

def run(url):
    soup = get_soup(url)
    article = soup.find('article')
    jobs_list = article.find_all('p')

    for job_entry in jobs_list:
        if 'Posted ' in job_entry.text:
            job_element = job_entry.find('a')
            globals.job_title = job_element.text
            globals.info_link = job_element['href']
            globals.job_summary = globals.info_link
            date = job_entry.text.split('Posted ')[1].split('/')
            month = int(date[0])
            day = int(date[1])
            year = int(date[2])
            globals.job_post_date = datetime(year, month, day)
            update_db(organization)
            reset_vars()

示例#13

0

显示文件

def run(url):
    soup = get_javascript_soup(url)
    job_listings = soup.find_all('div',{'class':'job-listing-job-item'})

    for job_listing in job_listings:
        job_description = job_listing.find_all('span')
        # Get job title and link
        globals.job_title = job_description[0].a.text
        globals.info_link = 'https://recruiting.paylocity.com' + job_description[0].a['href']
        globals.job_summary = globals.info_link
        # Get date as string
        date = job_description[1].text
        # Clean up date string by removing trailing -'s, then split and convert to datetime object
        if date[len(date)-2] == '-':
            date = date[0:len(date)-3]
        date = date.strip().split('/')
        month = int(date[0])
        day = int(date[1])
        year = int(date[2])
        globals.job_post_date = datetime(year, month, day)
        # Get Location
        globals.job_location = job_listing.find('div',{'class':'location-column'}).span.text
        # Get soup of job listing to scrape more info
        listing_soup = get_soup(globals.info_link)
        listing_body = listing_soup.find('body').find_all('p')
        # Retrieve Full/Part-time and Salary info if available
        if 'Location' in listing_body[0].text:
            location_string = listing_body[0].text.split(':')[1].lstrip()
            zip_code_result = re.search(r'(\d{5})', location_string)
            if zip_code_result != None:
                globals.job_zip_code = zip_code_result.group(1)
            # can't get city since there's no standard. It could be
            # "Hollywood", "Koreatown, Los angeles, California", or even
            # "Multiple Locations"
        if len(globals.job_zip_code) == 0:
            globals.job_zip_code = globals.city_to_zip(globals.job_location)
        if 'Status' in listing_body[1].text:
            globals.full_or_part = listing_body[1].text[8:]
        if 'Salary' in listing_body[2].text:
            globals.salary = listing_body[2].text[14:]
        update_db(organization)