def job_profile_table_edited(self): '''commit the change to the database and update the results returned from jobs sites for the new profile version.''' jobprofile = None try: with CommonFuncs.get_db() as db: jobprofile = db.query(JobProfile).one() except: pass if not jobprofile: jobprofile = JobProfile() job_profile_fields = JobProfile.__table__.columns.keys() # get the column headers for the Job Profile table i=0 for field in job_profile_fields: if not field == 'id': # skip the id value, which is autoincremented try: cell_text = self.ui.jobprofile_table.item(i,0).text() setattr(jobprofile, field, cell_text) except: pass else: # reset the index, so we don't skip a row i-=1 i+=1 with CommonFuncs.get_db() as db: db.add(jobprofile) db.commit() # if the user has checked that they want to delete unprocessed jobs on job profile edit, delete them if not self.init_process: with CommonFuncs.get_db() as db: try: settings = db.query(JobbybotSettings).one() except: pass if settings.delete_ujobs_on_jprofile_edit == True: db.query(UnprocessedJob).delete(synchronize_session=False) db.commit() CommonFuncs.log(self, 'committed update to job profile for user') CommonFuncs.log(self, 'starting query thread to find jobs related to their profile') self.threads['job_profile_search_results'] = JobProfileResultsThread() self.threads['job_profile_search_results'].started.connect(self.job_profile_search_results_started) self.threads['job_profile_search_results'].finished.connect(self.job_profile_search_results_finished) self.threads['job_profile_search_results'].start()
def file_select(self): '''upload a file path to the user's job profile in the db on click in cell.''' CommonFuncs.log(self,'attempting to update a user job file') for currentTableWidgetItem in self.ui.jobprofile_table.selectedItems(): current_row = currentTableWidgetItem.row() cell_text = self.ui.jobprofile_table.verticalHeaderItem(currentTableWidgetItem.row()).text() dlg = None if cell_text == 'resume' or cell_text == 'cover_letter': dlg = QFileDialog() dlg.setFileMode(QFileDialog.AnyFile) elif cell_text == 'supporting_docs': dlg = QFileDialog() dlg.setFileMode(QFileDialog.ExistingFiles) if dlg and dlg.exec_(): filenames = dlg.selectedFiles() with CommonFuncs.get_db() as db: job_profile = db.query(JobProfile).one() setattr(job_profile, cell_text, str(filenames)) db.add(job_profile) db.commit() CommonFuncs.log(self,'successfully committed doc to job profile: %s' % str(filenames)) self.ui.jobprofile_table.setItem( current_row-1, 1, QTableWidgetItem(str(filenames))) break
def play_selected_bot_thread(self): '''run the bot selected from the dropdown menu.''' job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot' CommonFuncs.log( self, 'attempting to play selected: %s' % job_site_bot_name) with CommonFuncs.get_db() as db: try: bot = CommonFuncs.get_bot(job_site_bot_name) bot.is_running = True except: bot = JobSiteAccount() bot.site_bot_name = job_site_bot_name bot.is_running = True db.add(bot) db.commit() jobsiteaccount = CommonFuncs.get_bot( job_site_bot_name ) if jobsiteaccount.username is None or jobsiteaccount.password is None: CommonFuncs.log(self, 'no valid login creds available') CommonFuncs.log(self, 'playing of bot canceled') return if bot_threads[job_site_bot_name]['applier'] is None or not bot_threads[job_site_bot_name]['applier'].isRunning(): bot_threads[job_site_bot_name]['applier'] = BotThread(job_site_bot_name) # only build thread, if it doesn't exist bot_threads[job_site_bot_name]['applier'].started.connect(self.bot_thread_started) bot_threads[job_site_bot_name]['applier'].finished.connect(self.bot_thread_finished) bot_threads[job_site_bot_name]['applier'].start() CommonFuncs.log(self, 'playing of %s successful!' % job_site_bot_name) else: CommonFuncs.log(self, 'playing of %s unsuccessful!' % job_site_bot_name)
def start_requests(self): '''return iterable of job links''' with CommonFuncs.get_db() as db: todoforsite = db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == 'Indeed_Bot').all() if len(todoforsite) >= 100: return start_time = datetime.now() job_profile = CommonFuncs.get_job_profile() locations = CommonFuncs.get_locations_list(job_profile) query_list = CommonFuncs.build_query_string(job_profile=job_profile, or_delim='or', bracket1='(', bracket2=')', adv_supp=True) query_string = query_list[0] if len(query_string) == 0: return ########## # URL ENCODE EACH QUERY ########## start_urls = [] for location in locations: query_dict = {'q':query_string, 'l':location} encoded_query = urllib.parse.urlencode(query_dict, safe='') job_url = JOB_SITE_LINKS['Indeed']['query'] + '&' + encoded_query start_urls.append(job_url) # CommonFuncs.log('time spent building start_urls for Indeed: ' + str(datetime.now() - start_time)) ########## # GET URL RESPONSES AND CALL PARSE FUNCTION TO ITERATE OVER PAGES ########## for url in start_urls: yield scrapy.Request(url=url, callback=self.parse)
def delete_selected_job_site(self): msg = QMessageBox() # show error message msg.setIcon(QMessageBox.Critical) msg.setText("Your login creds and unprocessed jobs will be deleted for this site.") msg.setInformativeText( "Are you sure you want to continue?" ) msg.setWindowTitle("Warning About Deletion: Irreversible") msg.setStandardButtons(QMessageBox.Ok | QMessageBox.Cancel) reply = msg.exec() if reply == QMessageBox.Ok: self.ui.deleteload_lbl.show() job_site = self.ui.jobsite_select.currentText() + '_Bot' with CommonFuncs.get_db() as db: # DELETE ACCOUNT jobsiteaccount = CommonFuncs.get_bot( job_site ) if not jobsiteaccount is None: db.delete(jobsiteaccount) db.commit() CommonFuncs.log(self, 'successfully deleted account for: ' + job_site) # DELETE ANY UNPROCESSED JOBS db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site).delete(synchronize_session=False) db.commit() CommonFuncs.log(self, 'successfully deleted all unprocessed jobs for account: ' + job_site) self.job_site_account_select() # refresh job site account section of gui self.ui.deleteload_lbl.hide()
def job_site_account_select(self): '''load user's account creds for the selected site.''' job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot' CommonFuncs.log(self, 'starting to find the account creds and stats for the user after job site account select') todo_count = 0 applied_count = 0 try: with CommonFuncs.get_db() as db: todo_count = len(db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site_bot_name).all()) applied_count = len(db.query(Job).filter( and_( Job.job_site == JOB_SITE_LINKS[self.ui.jobsite_select.currentText()]['job_site'], Job.applied == True )).all()) except: pass self.ui.todoforsite_btn.setText(str(todo_count)) self.ui.appliedforsite_btn.setText(str(applied_count)) jobsiteaccount = None jobsiteaccount = CommonFuncs.get_bot(job_site_bot_name) if not jobsiteaccount: self.ui.jobsiteusername_box.setText('') self.ui.jobsitepassword_box.setText('') else: self.ui.jobsiteusername_box.setText(jobsiteaccount.username) self.ui.jobsitepassword_box.setText(jobsiteaccount.password) if jobsiteaccount.is_running: self.ui.playload_lbl.show() else: self.ui.playload_lbl.hide() self.ui.jobsiteusername_box.setStyleSheet( 'background-color: white' ) self.ui.jobsitepassword_box.setStyleSheet( 'background-color: white' ) self.ui.verify_btn.setIcon( QtGui.QIcon( STATIC_FILES[ 'checked' ] ) ) CommonFuncs.log(self, 'finished finding the account creds and stats for the user after job site account select')
def run(self): self.isRunning() self.set_error(False) # reset error cached_db_results = [] while True: fresh_results = [] with CommonFuncs.get_db() as db: jobs = db.query(Job).all() if not jobs is None: fresh_results += jobs if cached_db_results != fresh_results: cached_db_results = fresh_results self.update_tables.emit() sleep(QTHREAD_SLEEP_TIMES['database_tables'])
def pause_selected_bot_thread(self): job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot' CommonFuncs.log(self, 'attempting to send pause signal to bot: %s' % job_site_bot_name ) with CommonFuncs.get_db() as db: try: bot = CommonFuncs.get_bot(job_site_bot_name) if bot.is_running: self.ui.pauseload_lbl.show() # only show loading gif if there is bot to pause bot.is_running = False db.add(bot) db.commit() except: CommonFuncs.log(self, 'problem sending pause signal for bot: %s' % job_site_bot_name, level='debug') pass CommonFuncs.log(self,'pause signal for %s successfully sent' % job_site_bot_name)
def verify_job_site_account_thread_finished(self): CommonFuncs.log(self, 'completed verification process of account creds') self.ui.jobsiteusername_box.setEnabled(True) self.ui.jobsitepassword_box.setEnabled(True) self.ui.jobsite_select.setEnabled(True) self.ui.jobsiteaccountcancel_btn.setEnabled(True) self.ui.verifyload_lbl.hide() self.ui.jobsiteaccountcancel_btn.setIcon(QIcon(STATIC_FILES['revert'])) if self.threads['verify_job_site_account'].error == True: self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(247, 126, 74)') self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(247, 126, 74)') self.ui.verify_btn.setIcon( QtGui.QIcon(STATIC_FILES['submit'])) # show the site creds need to be verified msg = QMessageBox() # show error message msg.setIcon(QMessageBox.Critical) msg.setText("Job Site Account Verification Failed") msg.setInformativeText("Please correct your username and password and try again.") msg.setWindowTitle("Job Site Login Failed") msg.setStandardButtons(QMessageBox.Ok) msg.exec() else: # COMMIT THE JOB SITE ACCOUNT CREDS jobsitestring = str(self.ui.jobsite_select.currentText()) + '_Bot' jobsiteaccount = None try: jobsiteaccount = CommonFuncs.get_bot(jobsitestring) except: pass if not jobsiteaccount: jobsiteaccount = JobSiteAccount() jobsiteaccount.site_bot_name = jobsitestring jobsiteaccount.username = self.ui.jobsiteusername_box.text() jobsiteaccount.password = self.ui.jobsitepassword_box.text() with CommonFuncs.get_db() as db: db.add(jobsiteaccount) db.commit() CommonFuncs.log(self, 'successfully stored valid account creds') self.ui.verify_btn.setIcon( QtGui.QIcon(STATIC_FILES['checked'])) # show the site creds have been verified self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(70, 188, 128)') self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(70, 188, 128)')
def run(self): self.isRunning() self.set_error(False) # reset error while True: stats_dict = {} with CommonFuncs.get_db() as db: try: processed = len(db.query(Job).all()) except: processed = 0 stats_dict.update({'processed': str(processed)}) try: applied = len(db.query(Job).filter(Job.applied == True).all()) except: applied = 0 stats_dict.update({'applied': str(applied)}) try: todo = len(db.query(UnprocessedJob).all()) except: todo = 0 stats_dict.update({'todo': str(todo)}) for j_site in JOB_SITE_LINKS: try: bot_name = j_site + '_Bot' try: todo_count = len(db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == bot_name).all()) applied_count = len(db.query(Job).filter( and_( Job.job_site == JOB_SITE_LINKS[j_site]['job_site'], Job.applied == True )).all()) except: todo_count = 0 applied_count = 0 stats_dict.update({bot_name + '_todo': str(todo_count)}) stats_dict.update({bot_name + '_applied': str(applied_count)}) except: pass stats_dict = str(stats_dict) self.stats.emit(str(stats_dict)) sleep( QTHREAD_SLEEP_TIMES['stats'] )
def search_table_btn_clicked(self): '''run query of jobs in db and return results.''' CommonFuncs.log('running query on jobs in db') try: search_string = self.ui.search_box.text() results={ 'jobs':[], } self.ui.jobs_table.clear() job_fields = Job.__table__.columns.keys() with CommonFuncs.get_db() as db: for field in job_fields: field_results = db.query(Job).filter(getattr(Job,field).contains(search_string)).all() if field_results: results['jobs'] += field_results results['jobs'] = list(set(results['jobs'])) # remove duplicates self.ui.jobs_table.setRowCount(len(job_fields)) header = self.ui.jobprofile_table.verticalHeader() header.setSectionResizeMode(QHeaderView.Stretch) i = 0 for field in job_fields: # build headers self.ui.jobs_table.setVerticalHeaderItem(i, QTableWidgetItem(field)) i += 1 results['jobs'].sort(key=lambda x: x.app_date and x.link_to_job, reverse=True) # sort by app_date if results['jobs']: col = 0 self.ui.jobs_table.setColumnCount(len(results['jobs'])) for result in results['jobs']: row = 0 for field in job_fields: cell_val = getattr(result,field) if cell_val is None: cell_val = '' self.ui.jobs_table.setItem(row,col,QTableWidgetItem(str(cell_val))) row+=1 if row>len(job_fields)-1: break col+=1 else: self.ui.jobs_table.setColumnCount(0) except: CommonFuncs.log(self,'query unsuccessful',level='debug') pass CommonFuncs.log('query of jobs in db successful')
def run(self): self.isRunning() self.set_error(False) # reset error Bot_Class = eval(self.site_bot_name) site_name = self.site_bot_name.split('_Bot')[0] spider_name = '_' + site_name.lower() + '_' + 'webcrawler.py' cached_username = '' cached_password = '' logged_in = False # APPLY LOOP bot = CommonFuncs.get_bot(self.site_bot_name) new_links = [''] with CommonFuncs.get_driver(visible=WEB_DRIVERS_VISIBLE, headless=WEB_DRIVERS_HEADLESS) as driver: bot_inst = Bot_Class(driver) while bot.is_running and len(new_links)>0: if cached_username != bot.username or cached_password != bot.password: # if the username or password changed, attempt new login cached_username = bot.username cached_password = bot.password logged_in = bot_inst.login(bot) if logged_in: # if logged in and bot is running, apply to a job with CommonFuncs.get_db() as db: try: new_to_db = False while not new_to_db: unprocessed_job = db.query(UnprocessedJob).filter( UnprocessedJob.bot_type == self.site_bot_name).all() new_link = unprocessed_job[0].job db.delete(unprocessed_job[0]) db.commit() db_results = db.query(Job).filter(Job.link_to_job == new_link).all() if db_results is None or db_results == []: new_to_db = True except: new_link = None pass if not new_link is None: CommonFuncs.log(self, 'attempting to apply to: ' + new_link) new_job = bot_inst.apply(new_link) # goto page and apply if new_job != False and isinstance(new_job, Job): # only add the job to database, if it is an instance with CommonFuncs.get_db() as db: # save job object to db try: db.add(new_job) db.commit() except Exception as e: print(e) else: CommonFuncs.log('applier taking a timeout as it waits for more job links') Jobbybot.run_bot_job_link_webcrawler( spider_name=spider_name ) # start the webcrawler for this bot sleep_count = 5 for i in range(sleep_count): # wait for more results, check to make sure the bot is still running if CommonFuncs.is_bot_running(self.site_bot_name): sleep(1) else: break bot = CommonFuncs.get_bot(self.site_bot_name) sleep(0.1) self.isFinished()
def apply(self, job): '''apply to job, create and commit job object to db, and return job object.''' job_profile = None with CommonFuncs.get_db() as db: # if no job profile, return try: job_profile = db.query(JobProfile).one() except: return # NAVIGATE TO JOB PAGE self.driver.get(job) # CREATE JOB OBJECT new_job = Job() new_job.app_date = datetime.now() new_job.link_to_job = job try: new_job.job_title = self.driver.find_element( By.CLASS_NAME, 'jobtitle').text new_job.company = self.driver.find_element(By.CLASS_NAME, 'company').text new_job.location = self.driver.find_element( By.CLASS_NAME, 'location').text except: pass new_job.job_site = CommonFuncs.fetch_domain_name(job) new_job.appled = False try: self.driver.implicitly_wait(1) try: self.driver.find_element_by_class_name( 'indeed-apply-button').click() except: return new_job CommonFuncs.switch_frames(self.driver, 'iframe[name$=modal-iframe]') # RESUME UPLOAD try: resume_file = eval(job_profile.resume)[0] self.driver.implicitly_wait(2) resume_file = resume_file.replace('/', '//') self.driver.find_element_by_id('resume').send_keys(resume_file) except: return new_job # UNIQUE(optional) COVER LETTER try: cover_letter_text = CommonFuncs.extract_text( eval(job_profile.cover_letter)[0]) if '{{company_name}}' in cover_letter_text: cover_letter_text = cover_letter_text.replace( '{{company_name}}', job['company']) if '{{job_title}}' in cover_letter_text: cover_letter_text = cover_letter_text.replace( '{{job_title}}', job['jobtitle']) cl_box = self.driver.find_element_by_tag_name('textarea') cl_box.clear() self.driver.implicitly_wait(1) for c in cover_letter_text: # send characters one at a time (otherwise some are consumed) cl_box.send_keys(c) except: pass # SUPPORTING DOCUMENTATION - if requested and available from user try: isdoc = 1 supp_docs = [] for sdoc in eval(job_profile.supporting_docs): self.driver.find_element_by_id('multattach' + str(isdoc)).send_keys(sdoc) isdoc += 1 except: pass # FILL OUT OTHER QUESTIONS & SUBMIT try: # FILL IN FULL NAME try: self.driver.find_element_by_id('applicant.name').send_keys( job_profile.applicant_name) except: pass # FIRST NAME try: self.driver.find_element_by_id('applicant.firstName').\ send_keys(job_profile.applicant_name.split(' ')[0]) except: pass # LAST NAME try: self.driver.find_element_by_id('applicant.lastName').\ send_keys(job_profile.applicant_name.split(' ')[1]) except: pass # PHONE NUMBER try: self.driver.find_element_by_id('applicant.phoneNumber').\ send_keys(job_profile.phone_number) except: pass # ADDRESS try: self.driver.find_element_by_xpath('//*[@data-prefill-id="q_basic_0_street_address"]')\ .send_keys(job_profile.your_address) except: pass # CITY try: self.driver.find_element_by_xpath('//*[@data-prefill-id="q_basic_0_city"]')\ .send_keys(job_profile.your_city) except: pass # QUESTION AND ANSWER self.driver.find_element_by_id('apply').click() self.driver.implicitly_wait(1) new_job.applied = True except (NoSuchElementException, ElementNotVisibleException ): # catch event where there is no continue self.driver.find_element_by_id('apply').click() self.driver.implicitly_wait(1) new_job.applied = True finally: self.driver.switch_to.window(self.driver.window_handles[0]) except (NoSuchElementException, ElementNotVisibleException): new_job.appled = False return new_job
def parse(self, response): with CommonFuncs.get_db() as db: todoforsite = db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == 'Indeed_Bot').all() if len(todoforsite) >= 100: return this_url = response._url try: searching_by = dict(parse_qsl(urlsplit(this_url).query)) print('searching by: ' + str(searching_by)) except: pass # CommonFuncs.log('starting parsing job page for IndeedWebcrawler: ' + response.url) # COLLECT NEW JOB LINKS FROM SITE jobs = response.xpath("//div[@data-tn-component='organicJob']") new_count = 0 for job in jobs: bot = CommonFuncs.get_bot('Indeed_Bot') if not bot.is_running: return # exit if the bot is not running extracted_job = job.extract() job_state = None if 'Easily apply' in extracted_job: job_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + job.xpath('h2/a/@href').extract()[0] with CommonFuncs.get_db() as db: db_results = db.query(Job).filter(Job.link_to_job == job_link).all() if db_results is None or db_results == []: new_count += 1 try: with CommonFuncs.get_db() as db: u_job = UnprocessedJob() u_job.bot_type = 'Indeed_Bot' u_job.job = job_link db.add(u_job) db.commit() except: pass # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response.url)) if new_count > 0: print('%s new jobs found on page' % new_count) ########## # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING ########## pagination_links = response.xpath( "//div[@class='pagination']/a" ).extract() for link in pagination_links: if 'Next' in link: bot = CommonFuncs.get_bot('Indeed_Bot') if bot.is_running: # verify that the bot is running before continuing to the next page # CommonFuncs.log('finished parsing job page for IndeedWebcrawler: ' + this_url) next_link = bs(link,'lxml').body.find('a').get('href') full_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + next_link yield scrapy.Request( url=full_link, callback=self.parse ) else: return # if __name__ == '__main__': # runner = CrawlerRunner() # runner.crawl(IndeedWebcrawler) # d = runner.join() # d.addBoth(lambda _: reactor.stop()) # reactor.run()
def parse(self, response): with CommonFuncs.get_db() as db: todoforsite = db.query(UnprocessedJob).filter( UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all() if len(todoforsite) >= 100: return # EXTRACT JOB LINKS ON THE PAGE AND COMMIT TO DB this_url = response._url try: searching_by = dict(parse_qsl(urlsplit(this_url).query)) print('searching by: ' + str(searching_by)) except: pass # CommonFuncs.log('starting parsing job page for ZiprecruiterWebcrawler: ' + response.url) new_jobs = None try: #@data-tracking='quick_apply' new_jobs = response.xpath( "//div[@class='job_results']/article/div[@class='job_tools']/" + "button[@data-tracking='quick_apply']" + "/ancestor::article" + "/div[@class='job_content']/a/@href").extract() except: # CommonFuncs.log('could not find jobs on the page: ' + this_url) pass new_count = 0 if not new_jobs is None: # if no results found return for job_link in new_jobs: # dump the job links to the db with CommonFuncs.get_db() as db: db_results = db.query(Job).filter( Job.link_to_job == job_link).all() if db_results is None or db_results == []: try: with CommonFuncs.get_db() as db: u_job = UnprocessedJob() u_job.bot_type = 'Ziprecruiter_Bot' u_job.job = job_link db.add(u_job) db.commit() new_count += 1 except: # CommonFuncs.log('something went wrong in ZiprecruiterWebcrawler trying to commit job link: %s' % job_link, level='debug') pass # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response._url) ) if new_count > 0: print('%s new jobs found on page' % new_count) ########## # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING ########## data_next_url = '' try: data_next_url = response.xpath("//div[@class='job_results']") data_next_url = data_next_url[0].root.attrib['data-next-url'] if len(data_next_url) > 0: url = JOB_SITE_LINKS['Ziprecruiter'][ 'job_site_base'] + data_next_url bot = CommonFuncs.get_bot('Ziprecruiter_Bot') # CommonFuncs.log('finished parsing job page for ZiprecruiterWebcrawler: ' + this_url) if bot.is_running: # verify that the bot is running before continuing to the next page yield scrapy.Request(url=url, callback=self.parse) else: return except: pass # if __name__ == '__main__': # runner = CrawlerRunner() # runner.crawl(ZiprecruiterLoginWebcrawler(username='******', password='******')) # d = runner.join() # d.addBoth(lambda _: reactor.stop()) # reactor.run()
def start_requests(self): '''return iterable of job links''' with CommonFuncs.get_db() as db: todoforsite = db.query(UnprocessedJob).filter( UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all() if len(todoforsite) >= 100: return start_time = datetime.now() job_profile = CommonFuncs.get_job_profile() locations = CommonFuncs.get_locations_list(job_profile) query_list = CommonFuncs.build_query_string(job_profile=job_profile, or_delim='', bracket1='', bracket2='', adv_supp=False) if len(query_list) == 0: return ########## # URL ENCODE EACH QUERY ########## start_urls = [] for location in locations: for query_string in query_list: bot = CommonFuncs.get_bot('Ziprecruiter_Bot') if bot.is_running: # verify that the bot is running before continuing to the next page query_dict = {'search': query_string, 'location': location} encoded_query = urllib.parse.urlencode(query_dict, safe='') job_url = JOB_SITE_LINKS['Ziprecruiter'][ 'query'] + '&' + encoded_query start_urls.append(job_url) response = html.fromstring(requests.get(job_url).content) temp = response.xpath( "//menu[@class='select-menu-submenu t_filter_dropdown_titles']/a/@href" ) temp = [ JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i for i in temp ] start_urls += temp # append all of the links from filtering by job title temp = response.xpath( "//menu[@class='select-menu-submenu t_filter_dropdown_companies']/a/@href" ) temp = [ JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i for i in temp ] start_urls += temp # append all of the links from filtering by company else: return msg = 'time spent building start_urls for Ziprecruiter: ' + str( datetime.now() - start_time) # CommonFuncs.log( msg ) print(msg) ########## # GET URL RESPONSES AND CALL PARSE FUNCTION TO ITERATE OVER PAGES ########## print('TOTAL START URLs: ' + str(len(start_urls))) i = 1 for url in start_urls: print('LINK#: ' + str(i) + ' WORKING ON NEW START URL: ' + url) yield scrapy.Request(url=url, callback=self.parse) i += 1
def initialize_gui(self): '''initialize numerous gui settings before launching the application''' # JOB SITE ACCOUNT SECTION self.setup_job_site_account_section() # JOB PROFILE TABLE jobprofile = None try: with CommonFuncs.get_db() as db: jobprofile = db.query(JobProfile).one() except: pass if not jobprofile: jobprofile = JobProfile() job_profile_fields = JobProfile.__table__.columns.keys() self.ui.jobprofile_table.doubleClicked.connect(self.file_select) self.ui.jobprofile_table.setColumnCount(1) self.ui.jobprofile_table.setRowCount(len(job_profile_fields) - 1) self.ui.jobprofile_table.setEditTriggers(QAbstractItemView.DoubleClicked | QAbstractItemView.SelectedClicked) header = self.ui.jobprofile_table.horizontalHeader() header.setSectionResizeMode(QHeaderView.Stretch) # JOB PROFILE TABLE HEADERS i = 0 for field in job_profile_fields: # build headers if not field == 'id': # don't show the id self.ui.jobprofile_table.setVerticalHeaderItem(i, QTableWidgetItem(field)) else: i -= 1 i += 1 # LOAD JOB PROFILE FROM DB i = -1 # when setting table widget values, the index starts at -1 for field in job_profile_fields: if not field == 'id': # don't show the id self.ui.jobprofile_table.setItem(i, 1, QTableWidgetItem(getattr(jobprofile, field))) else: # return to previous row, to avoid skipping a row's header i -= 1 i += 1 CommonFuncs.log(self,'finished loading job profile from db') self.ui.jobprofile_table.setHorizontalHeaderItem(0, QTableWidgetItem('values')) self.ui.jobprofile_table.cellChanged.connect(self.job_profile_table_edited) # JOB PROFILE SEARCH RESULTS self.set_progress_bar_gif(self.ui.matchupload_lbl, static_key='hourglass_big') self.ui.matchupload_lbl.setStyleSheet('background-color: rgb(225,225,225)') self.ui.matchupload_lbl.setAlignment(Qt.AlignCenter) self.ui.matchupload_lbl.hide() self.ui.matchup_table.setColumnCount(1) self.ui.matchup_table.setHorizontalHeaderItem(0, QTableWidgetItem('Top Results')) header = self.ui.matchup_table.horizontalHeader() header.setSectionResizeMode(QHeaderView.Stretch) # DELETE ON JOB PROFILE EDIT -- this must be initialized after table edit function run with CommonFuncs.get_db() as db: try: settings = db.query(JobbybotSettings).one() except: pass if settings.delete_ujobs_on_jprofile_edit == True: self.ui.delete_ujobs_on_jprofile_edit_check.setChecked(True) # SEARCH INTERFACE SETUP self.ui.search_btn.setIcon(QIcon(STATIC_FILES['search'])) self.ui.search_btn.setIconSize(QSize(28, 28)) # STATS THREADS self.threads['stats'] = StatsUpdateThread() self.threads['stats'].stats.connect(self.update_stats) # SEARCH TABLE AND BOX self.ui.tabs.setCurrentIndex(0) # select jobs tab self.ui.search_btn.clicked.connect(self.search_table_btn_clicked) self.ui.search_box.returnPressed.connect(self.search_table_btn_clicked) self.threads['database_tables'] = DatabaseTablesThread() self.threads['database_tables'].update_tables.connect(self.search_table_btn_clicked) self.search_table_btn_clicked()
def apply(self, job): '''apply to the job and store in db. return the job object.''' job_profile = None with CommonFuncs.get_db() as db: # if no job profile, return try: job_profile = db.query(JobProfile).one() except: return self.driver.get(job) # navigate to job page # build job object new_job = Job() new_job.app_date = datetime.now() new_job.link_to_job = job new_job.job_title = self.driver.find_element_by_xpath( '//*[@id="frame"]/div[4]/div/div/div/div[2]/div/div[1]/div[1]/div[1]/div/div[2]/h1' ).text new_job.job_site = CommonFuncs.fetch_domain_name( self.driver.current_url) new_job.applied = False # default # GET EMPLOYER NAME try: employer_name = self.driver.find_element_by_class_name( 'job_emp_details').find_element_by_tag_name('a').text new_job.company = employer_name except: pass # GET LOCATION try: other_data = self.driver.find_elements_by_class_name('job-bfields') for datum in other_data: if "location" in datum.find_element_by_class_name( 'label').text.lower(): job_location = datum.find_element_by_class_name( 'widget').text new_job.location = job_location break except: pass # GET CONTACT INFO - if available try: contact_block = self.driver.find_element_by_id('sb_contactinfo') contents = contact_block.text.split('\n') new_job.contact_name = contents[1] for item in contents: if "@" in item: new_job.contact_email = item elif "http" in item: new_job.company_site = item except: pass # OPEN APPLICATION FORM try: job_apply_button = self.driver.find_element_by_id('job_send_docs') job_apply_button.click() docs_used = [] # make doc selections doc_fields = [ 'resume', 'cover_letter', 'writing_sample', 'transcript' ] supporting_docs = [] for doc_field in doc_fields: try: select = Select( self.driver.find_element_by_name( "dnf_class_values[non_ocr_job_resume][%s]" % doc_field)) if doc_field == "resume": select.select_by_index(0) else: select.select_by_index( 1) # select the first available cover letter except: pass # SUBMIT APPLICATION try: self.driver.find_element_by_xpath( "//*[@id='job_resume_form']").find_element_by_name( "dnf_opt_submit").click() new_job.applied = True except: new_job.appled = False except: new_job.appled = False return new_job
def __init__(self): self.init_process = True # some processes in functions disabled during initialization if os.path.isfile(LOG_FILE_PATH): os.remove(LOG_FILE_PATH) # delete the log from the last session CommonFuncs.log(self, 'Jobbybot session started') self.user_settings = None # store login creds, job profile, etc self.threads = THREADS_DICT # RESET ALL BOTS TO NOT IS_RUNNING for j_site in JOB_SITE_LINKS: site_bot_name = j_site + '_Bot' with CommonFuncs.get_db() as db: try: bot = CommonFuncs.get_bot(site_bot_name) bot.is_running = False except: bot = JobSiteAccount() bot.is_running = False bot.site_bot_name = site_bot_name db.add(bot) db.commit() CommonFuncs.log(self,'reset %s to not running in db' % site_bot_name) # CHECK FOR SETTINGS OBJECT - create if it does not exist settings = None with CommonFuncs.get_db() as db: try: settings = db.query(JobbybotSettings).one() except: pass if not settings: new_settings = JobbybotSettings() new_settings.connect_to_gsheets = False new_settings.delete_ujobs_on_jprofile_edit = True db.add(new_settings) db.commit() # add settings object to database # START GUI SETUP app = QApplication(sys.argv) self.MainWindow = QtWidgets.QMainWindow() self.ui = Ui_MainWindow() self.ui.setupUi(self.MainWindow) QApplication.setStyle(QStyleFactory.create('Fusion')) self.MainWindow.setWindowIcon(QIcon(STATIC_FILES['logo'])) self.MainWindow.setGeometry(0,60,778,629) self.initialize_gui() CommonFuncs.log(self,'finished initializing gui') CommonFuncs.log(self,'Launching Jobbybot!') self.threads['stats'].start() self.threads['database_tables'].start() # OPEN AND RUN THE GUI self.init_process = False self.MainWindow.show() self.job_profile_table_edited() # initial population of the results for the job profile sys.exit(app.exec_())
def get_new_link(self, desired_result_count=1, need_new_results=True): '''apply filters from the job_profile and return a new job link.''' job_profile = None with CommonFuncs.get_db( ) as db: # if no job profile, return an empty link try: job_profile = db.query(JobProfile).one() except: return '' self.driver.get(JOB_SITE_LINKS['job_site']) # navigate to the job site self.driver.find_element_by_link_text( 'Advanced Search').click() # open the filter settings # Open the ajax more filters form expander hyperlinks = self.driver.find_element_by_class_name( "content-container-inner").find_elements_by_tag_name('a') for hyperlink in hyperlinks: try: if str(hyperlink.text).lower() == "more filters": hyperlink.click() break except: pass # FILL IN ALL COMBOBOXES AND LISTBOXES combobox_filters = \ { "//*[@id='advsearch_ocr_']": 'job_types', "//*[@id='jobfilters_industry___']": 'industries', "//*[@id='jobfilters_job_type___']": 'job_types', "//*[@id='advsearch_job_custom_field_2___']": 'terms_of_employment', "//*[@id='advsearch_multi_state___']": 'states', "//*[@id='advsearch_multi_country___']": 'countries', "//*[@id='advsearch_work_authorization___']": 'work_authorizations' } for box_filter in combobox_filters: combo_selections = CommonFuncs.combo_select( combobox=self.driver.find_element_by_xpath(box_filter), visible_text_list=getattr(job_profile, combobox_filters[box_filter])) # FILL IN ZIP CODE AND RADIUS IF APPLICABLE zip_box = self.driver.find_element_by_xpath( "//*[@id='jobfilters_distance_search__base_']") radius_box = self.driver.find_element_by_xpath( "//*[@id='jobfilters_distance_search__distance_']") if len(job_profile.zip_code) > 0: try: zip_box.send_keys(job_profile.zip_code) radius_box.send_keys(job_profile.radius) except: pass # EXCLUDE JOBS ALREADY APPLIED TO self.driver.find_element_by_name("jobfilters[exclude_applied_jobs]" ).click() # exclude jobs applied for # ONLY INCLUDE JOBS IN THE SELECTED MAJOR self.driver.find_element_by_name( "advsearch[major_ignore_all_pick]").click( ) # include only selected major # APPLY FILTERS self.driver.find_element_by_xpath( r'//*[@id="frame"]/div[4]/div/div/div[2]/div[1]/div/form/div/div[2]/span[1]/input[1]' ).click() # apply filters # SUBMIT KEYWORDS try: keywords_string = job_profile.keywords search_box = self.driver.find_element_by_name( "jobfilters[keywords]") search_box.clear() sleep(3) search_box.send_keys(keywords_string) # CLICK SEARCH BUTTON self.driver.find_element_by_xpath( "//*[@id='frame']/div[4]/div/div/div[2]/div[1]/div/form/div/div[1]/input[2]" ).click() except WebDriverException: pass # FIND A NEW LINK NOT YET PROCESSED BY ANY BOT THAT MATCHES THE FILTER RESULTS try: soup = BeautifulSoup(self.driver.page_source) jobs_on_page_container = soup.find( 'div', {'id': 'student_job_list_content'}) jobs_on_page = jobs_on_page_container.find('ul').findAll('li') pages = self.driver.find_element_by_name( '_pager').find_elements_by_tag_name('option') except AttributeError: return [] new_jobs_list = [] for page in pages: try: soup = BeautifulSoup( self.driver.page_source) # get the page source html except: return [] try: jobs_on_page_container = soup.find( 'div', {'id': 'student_job_list_content'}) jobs_on_page = jobs_on_page_container.find('ul').findAll('li') except: pass if jobs_on_page: for job in jobs_on_page: job_links = job.findAll('a') # find all links in list item for link in job_links: if '?mode' in link[ 'href']: # urls with this encoding goto the page for that job if need_new_results: with CommonFuncs.get_db() as db: db_matches = db.query(Job).filter( Job.link_to_job.contains( link['href'])).all() else: db_matches = [] if not db_matches: # if this job has not been processed by any bot yet, return the link if desired_result_count == 1: return JOB_SITE_LINKS[ 'job_site_base'] + link['href'] elif len(new_jobs_list) < desired_result_count: new_jobs_list.append( JOB_SITE_LINKS['job_site_base'] + link['href']) else: return new_jobs_list try: self.driver.find_element_by_link_text('Next').click() except (NoSuchElementException, AttributeError): pass return [] # if no new links