示例#1
0
def get_filtered_jobs(request):
    jobs = Job.query()

    if request.GET:
        values = dict([(k,request.GET[k]) for k in request.GET if k not in ('exclude_ids','destinatary_or_empty')])
        jobs = jobs.filter(**values)
        if request.GET.get('exclude_ids',None):
            jobs = jobs.exclude(pk__in=request.GET['exclude_ids'].split(','))
        if request.GET.get('destinatary_or_empty',None):
            jobs = jobs.filter(destinatary__in=(request.GET['destinatary_or_empty'],None,''))

    return jobs
示例#2
0
def clear_database():
    ''' Clears the temporary database upon new search" '''
    all_objects = Job.query().fetch()
    for a in all_objects:
        a.key.delete()
示例#3
0
    def post(self):
        # Clear database of previous results
        clear_database()

        job = cgi.escape(self.request.get("job"))
        location = cgi.escape(self.request.get("location"))

        #example query, defaults to searching for a Software Engineer in San Jose
        if len(location) < 1:
            location = "San Jose, CA"
        if len(job) < 1:
            job = "Software Engineer"

        #variables using user's query that are used to search indeed & dice

        indeed_job = job.replace(" ", "+")
        indeed_loc = location.replace(" ", "+")
        indeed_loc = indeed_loc.replace(",", "%2C")

        dice_job = job.replace(" ", "+")
        dice_loc = location.replace(" ", "+")
        dice_loc = dice_loc.replace(",", "%2C")

        #base indeed & dice url where user inputs are added
        indeed_url = "http://www.indeed.com/jobs?q=%s&l=%s" % (indeed_job, indeed_loc)
        dice_url = "https://www.dice.com/jobs?q=%s&l=%s" % (dice_job, dice_loc)

        #initialize beautiful soup object for indeed and dice
        indeed = urlopen(indeed_url)
        indeed_soup = BeautifulSoup(indeed, "html.parser")

        dice = urlopen(dice_url)
        dice_soup = BeautifulSoup(dice, "html.parser")

        # INDEED Parsing
        #check for errors in indeed query
        bad_query = indeed_soup.find_all("div", {"class": "bad_query"})
        invalid_location = indeed_soup.find_all("div", {"class": "invalid_location"})

        #if there are no errors parse info from Indeed
        #Title of job, title of company, location of job, description of job, link for job
        if len(bad_query) == 0 and len(invalid_location) == 0:
            titles = indeed_soup.find_all("a", {"data-tn-element": "jobTitle"})
            companies = indeed_soup.findAll("span", {"class", "company"})
            loc = indeed_soup.find_all("span", {"class": "location"})
            desc = indeed_soup.find_all("span", {"class": "summary"})
            # jobURLS = indeed_soup.find_all("a", {"class": "jobtitle"})
            jobURLS = indeed_soup.find_all("a", {"class": "turnstileLink"})

            #add all job info to i_job
            for t, c, l, d, h in zip(titles, companies, loc, desc, jobURLS):
                print t
                if t:
                    i_job = Job()
                    i_job.title = t.get_text().strip()
                    i_job.company = c.get_text().strip()
                    i_job.location = l.get_text().strip()
                    i_job.description = d.get_text().encode("utf8").strip()
                    i_job.href = h.get("href")
                    i_job.site = "indeed"
                    i_job.put()

            # DICE Parsing
            # parse info into dice_jobs and locations
            dice_jobs = dice_soup.findAll('div', {'class': 'serp-result-content'})

            locations = dice_soup.find_all("li", {"class": "location"})
            # diceJobURLS = dice_soup.find_all("a", {"class": "dice-btn-link"})
            for job, loc in zip(dice_jobs, locations):
                d_job = Job()
                exists = job.find("a", {"class": "dice-btn-link"}).get("title")
                if exists: #if everything exists.. add job info from Dice into d_job
                    d_job = Job()
                    d_job.title = job.find("a", {"class": "dice-btn-link"}).get("title").strip()
                    d_job.company = job.find("li", {"class": "employer"}).get_text().strip()
                    desc = job.find("div", {"class": "shortdesc"}).get_text().encode("utf8")
                    d_job.description = str(desc).strip()
                    d_job.location = loc.get_text()
                    d_job.href = job.find("a", {"class": "dice-btn-link"}).get('href')
                    d_job.site = "dice"
                    # Store to database
                    d_job.put()

        else:
            print("Bad search query. Please check your spelling") #error handling.  If theres a bad query for either indeed or dice print an error

        # Query database for new jobs
        d_jobs = Job.query(Job.site == "dice").fetch()
        i_jobs = Job.query(Job.site == "indeed").fetch()
        self.response.out.write(template.render('views/index.html', {'d_jobs': d_jobs, 'i_jobs': i_jobs}))