Python CommonFuncs.get_db примеры, common_funcs.CommonFuncs.get_db Python примеры использования

Пример #1

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def job_profile_table_edited(self):
        '''commit the change to the database and update the results returned from jobs sites for the new
        profile version.'''
        jobprofile = None
        try:
            with CommonFuncs.get_db() as db:
                jobprofile = db.query(JobProfile).one()
        except:
            pass
        if not jobprofile:
            jobprofile = JobProfile()

        job_profile_fields = JobProfile.__table__.columns.keys()    # get the column headers for the Job Profile table

        i=0
        for field in job_profile_fields:
            if not field == 'id':   # skip the id value, which is autoincremented
                try:
                    cell_text = self.ui.jobprofile_table.item(i,0).text()
                    setattr(jobprofile, field, cell_text)
                except:
                    pass
            else:   # reset the index, so we don't skip a row
                i-=1
            i+=1

        with CommonFuncs.get_db() as db:
            db.add(jobprofile)
            db.commit()

        # if the user has checked that they want to delete unprocessed jobs on job profile edit, delete them
        if not self.init_process:
            with CommonFuncs.get_db() as db:
                try:
                    settings = db.query(JobbybotSettings).one()
                except:
                    pass
                if settings.delete_ujobs_on_jprofile_edit == True:
                    db.query(UnprocessedJob).delete(synchronize_session=False)
                    db.commit()

        CommonFuncs.log(self, 'committed update to job profile for user')
        CommonFuncs.log(self, 'starting query thread to find jobs related to their profile')

        self.threads['job_profile_search_results'] = JobProfileResultsThread()
        self.threads['job_profile_search_results'].started.connect(self.job_profile_search_results_started)
        self.threads['job_profile_search_results'].finished.connect(self.job_profile_search_results_finished)
        self.threads['job_profile_search_results'].start()

Пример #2

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def file_select(self):
        '''upload a file path to the user's job profile in the db on click in cell.'''
        CommonFuncs.log(self,'attempting to update a user job file')
        for currentTableWidgetItem in self.ui.jobprofile_table.selectedItems():
            current_row = currentTableWidgetItem.row()
            cell_text = self.ui.jobprofile_table.verticalHeaderItem(currentTableWidgetItem.row()).text()
            dlg = None
            if cell_text == 'resume' or cell_text == 'cover_letter':
                dlg = QFileDialog()
                dlg.setFileMode(QFileDialog.AnyFile)
            elif cell_text == 'supporting_docs':
                dlg = QFileDialog()
                dlg.setFileMode(QFileDialog.ExistingFiles)
            if dlg and dlg.exec_():
                filenames = dlg.selectedFiles()
                with CommonFuncs.get_db() as db:
                    job_profile = db.query(JobProfile).one()
                    setattr(job_profile, cell_text, str(filenames))
                    db.add(job_profile)
                    db.commit()
                    CommonFuncs.log(self,'successfully committed doc to job profile: %s' % str(filenames))
                self.ui.jobprofile_table.setItem(
                    current_row-1, 1, QTableWidgetItem(str(filenames)))

            break

Пример #3

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def play_selected_bot_thread(self):
        '''run the bot selected from the dropdown menu.'''

        job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot'
        CommonFuncs.log( self, 'attempting to play selected: %s' % job_site_bot_name)
        with CommonFuncs.get_db() as db:
            try:
                bot = CommonFuncs.get_bot(job_site_bot_name)
                bot.is_running = True
            except:
                bot = JobSiteAccount()
                bot.site_bot_name = job_site_bot_name
                bot.is_running = True
            db.add(bot)
            db.commit()

        jobsiteaccount = CommonFuncs.get_bot( job_site_bot_name )
        if jobsiteaccount.username is None or jobsiteaccount.password is None:
            CommonFuncs.log(self, 'no valid login creds available')
            CommonFuncs.log(self, 'playing of bot canceled')
            return

        if bot_threads[job_site_bot_name]['applier'] is None or not bot_threads[job_site_bot_name]['applier'].isRunning():
            bot_threads[job_site_bot_name]['applier'] = BotThread(job_site_bot_name)  # only build thread, if it doesn't exist
            bot_threads[job_site_bot_name]['applier'].started.connect(self.bot_thread_started)
            bot_threads[job_site_bot_name]['applier'].finished.connect(self.bot_thread_finished)
            bot_threads[job_site_bot_name]['applier'].start()

            CommonFuncs.log(self, 'playing of %s successful!' % job_site_bot_name)
        else:
            CommonFuncs.log(self, 'playing of %s unsuccessful!' % job_site_bot_name)

Пример #4

0

Показать файл

Файл: _indeed_webcrawler.py Проект: ConnorSMaynes/jobbybot

    def start_requests(self):
        '''return iterable of job links'''

        with CommonFuncs.get_db() as db:
            todoforsite = db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == 'Indeed_Bot').all()
        if len(todoforsite) >= 100:
            return

        start_time = datetime.now()

        job_profile = CommonFuncs.get_job_profile()
        locations = CommonFuncs.get_locations_list(job_profile)
        query_list = CommonFuncs.build_query_string(job_profile=job_profile, or_delim='or', bracket1='(', bracket2=')', adv_supp=True)
        query_string = query_list[0]

        if len(query_string) == 0: return

        ##########
        # URL ENCODE EACH QUERY
        ##########
        start_urls = []
        for location in locations:
            query_dict = {'q':query_string, 'l':location}
            encoded_query = urllib.parse.urlencode(query_dict, safe='')
            job_url = JOB_SITE_LINKS['Indeed']['query'] + '&' + encoded_query
            start_urls.append(job_url)

        # CommonFuncs.log('time spent building start_urls for Indeed: ' + str(datetime.now() - start_time))

        ##########
        # GET URL RESPONSES AND CALL PARSE FUNCTION TO ITERATE OVER PAGES
        ##########
        for url in start_urls:
            yield scrapy.Request(url=url, callback=self.parse)

Пример #5

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def delete_selected_job_site(self):
        msg = QMessageBox()  # show error message
        msg.setIcon(QMessageBox.Critical)
        msg.setText("Your login creds and unprocessed jobs will be deleted for this site.")
        msg.setInformativeText( "Are you sure you want to continue?" )
        msg.setWindowTitle("Warning About Deletion: Irreversible")
        msg.setStandardButtons(QMessageBox.Ok | QMessageBox.Cancel)
        reply = msg.exec()

        if reply == QMessageBox.Ok:
            self.ui.deleteload_lbl.show()
            job_site = self.ui.jobsite_select.currentText() + '_Bot'
            with CommonFuncs.get_db() as db:
                # DELETE ACCOUNT
                jobsiteaccount = CommonFuncs.get_bot( job_site )
                if not jobsiteaccount is None:
                    db.delete(jobsiteaccount)
                    db.commit()
                    CommonFuncs.log(self, 'successfully deleted account for: ' + job_site)
                # DELETE ANY UNPROCESSED JOBS
                db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site).delete(synchronize_session=False)
                db.commit()
                CommonFuncs.log(self, 'successfully deleted all unprocessed jobs for account: ' + job_site)
            self.job_site_account_select()  # refresh job site account section of gui
            self.ui.deleteload_lbl.hide()

Пример #6

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

 def job_site_account_select(self):
     '''load user's account creds for the selected site.'''
     job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot'
     CommonFuncs.log(self, 'starting to find the account creds and stats for the user after job site account select')
     todo_count = 0
     applied_count = 0
     try:
         with CommonFuncs.get_db() as db:
             todo_count = len(db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site_bot_name).all())
             applied_count = len(db.query(Job).filter(
                 and_(
                     Job.job_site == JOB_SITE_LINKS[self.ui.jobsite_select.currentText()]['job_site'],
                     Job.applied == True
                 )).all())
     except:
         pass
     self.ui.todoforsite_btn.setText(str(todo_count))
     self.ui.appliedforsite_btn.setText(str(applied_count))
     jobsiteaccount = None
     jobsiteaccount = CommonFuncs.get_bot(job_site_bot_name)
     if not jobsiteaccount:
         self.ui.jobsiteusername_box.setText('')
         self.ui.jobsitepassword_box.setText('')
     else:
         self.ui.jobsiteusername_box.setText(jobsiteaccount.username)
         self.ui.jobsitepassword_box.setText(jobsiteaccount.password)
         if jobsiteaccount.is_running:
             self.ui.playload_lbl.show()
         else:
             self.ui.playload_lbl.hide()
     self.ui.jobsiteusername_box.setStyleSheet( 'background-color: white' )
     self.ui.jobsitepassword_box.setStyleSheet( 'background-color: white' )
     self.ui.verify_btn.setIcon( QtGui.QIcon( STATIC_FILES[ 'checked' ] ) )
     CommonFuncs.log(self, 'finished finding the account creds and stats for the user after job site account select')

Пример #7

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

 def run(self):
     self.isRunning()
     self.set_error(False)  # reset error
     cached_db_results = []
     while True:
         fresh_results = []
         with CommonFuncs.get_db() as db:
             jobs = db.query(Job).all()
             if not jobs is None:
                 fresh_results += jobs
             if cached_db_results != fresh_results:
                 cached_db_results = fresh_results
                 self.update_tables.emit()
         sleep(QTHREAD_SLEEP_TIMES['database_tables'])

Пример #8

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

 def pause_selected_bot_thread(self):
     job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot'
     CommonFuncs.log(self, 'attempting to send pause signal to bot: %s' % job_site_bot_name )
     with CommonFuncs.get_db() as db:
         try:
             bot = CommonFuncs.get_bot(job_site_bot_name)
             if bot.is_running:
                 self.ui.pauseload_lbl.show()    # only show loading gif if there is bot to pause
                 bot.is_running = False
                 db.add(bot)
                 db.commit()
         except:
             CommonFuncs.log(self, 'problem sending pause signal for bot: %s' % job_site_bot_name, level='debug')
             pass
     CommonFuncs.log(self,'pause signal for %s successfully sent' % job_site_bot_name)

Пример #9

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def verify_job_site_account_thread_finished(self):

        CommonFuncs.log(self, 'completed verification process of account creds')

        self.ui.jobsiteusername_box.setEnabled(True)
        self.ui.jobsitepassword_box.setEnabled(True)
        self.ui.jobsite_select.setEnabled(True)
        self.ui.jobsiteaccountcancel_btn.setEnabled(True)
        self.ui.verifyload_lbl.hide()
        self.ui.jobsiteaccountcancel_btn.setIcon(QIcon(STATIC_FILES['revert']))
        if self.threads['verify_job_site_account'].error == True:
            self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(247, 126, 74)')
            self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(247, 126, 74)')
            self.ui.verify_btn.setIcon(
                QtGui.QIcon(STATIC_FILES['submit']))  # show the site creds need to be verified
            msg = QMessageBox() # show error message
            msg.setIcon(QMessageBox.Critical)
            msg.setText("Job Site Account Verification Failed")
            msg.setInformativeText("Please correct your username and password and try again.")
            msg.setWindowTitle("Job Site Login Failed")
            msg.setStandardButtons(QMessageBox.Ok)
            msg.exec()
        else:
            # COMMIT THE JOB SITE ACCOUNT CREDS
            jobsitestring = str(self.ui.jobsite_select.currentText()) + '_Bot'
            jobsiteaccount = None
            try:
                jobsiteaccount = CommonFuncs.get_bot(jobsitestring)
            except:
                pass
            if not jobsiteaccount:
                jobsiteaccount = JobSiteAccount()

            jobsiteaccount.site_bot_name = jobsitestring
            jobsiteaccount.username = self.ui.jobsiteusername_box.text()
            jobsiteaccount.password = self.ui.jobsitepassword_box.text()

            with CommonFuncs.get_db() as db:
                db.add(jobsiteaccount)
                db.commit()

            CommonFuncs.log(self, 'successfully stored valid account creds')

            self.ui.verify_btn.setIcon(
                QtGui.QIcon(STATIC_FILES['checked']))  # show the site creds have been verified
            self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(70, 188, 128)')
            self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(70, 188, 128)')

Пример #10

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def run(self):
        self.isRunning()
        self.set_error(False)   # reset error
        while True:
            stats_dict = {}
            with CommonFuncs.get_db() as db:
                try:
                    processed = len(db.query(Job).all())
                except:
                    processed = 0
                stats_dict.update({'processed': str(processed)})

                try:
                    applied = len(db.query(Job).filter(Job.applied == True).all())
                except:
                    applied = 0
                stats_dict.update({'applied': str(applied)})

                try:
                    todo = len(db.query(UnprocessedJob).all())
                except:
                    todo = 0
                stats_dict.update({'todo': str(todo)})

                for j_site in JOB_SITE_LINKS:
                    try:
                        bot_name = j_site + '_Bot'
                        try:
                            todo_count = len(db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == bot_name).all())
                            applied_count = len(db.query(Job).filter(
                                and_(
                                    Job.job_site == JOB_SITE_LINKS[j_site]['job_site'],
                                    Job.applied == True
                                )).all())
                        except:
                            todo_count = 0
                            applied_count = 0
                        stats_dict.update({bot_name + '_todo': str(todo_count)})
                        stats_dict.update({bot_name + '_applied': str(applied_count)})
                    except:
                        pass

                stats_dict = str(stats_dict)
                self.stats.emit(str(stats_dict))

            sleep( QTHREAD_SLEEP_TIMES['stats'] )

Пример #11

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

 def search_table_btn_clicked(self):
     '''run query of jobs in db and return results.'''
     CommonFuncs.log('running query on jobs in db')
     try:
         search_string = self.ui.search_box.text()
         results={
             'jobs':[],
         }
         self.ui.jobs_table.clear()
         job_fields = Job.__table__.columns.keys()
         with CommonFuncs.get_db() as db:
             for field in job_fields:
                 field_results = db.query(Job).filter(getattr(Job,field).contains(search_string)).all()
                 if field_results:
                     results['jobs'] += field_results
             results['jobs'] = list(set(results['jobs']))    # remove duplicates
         self.ui.jobs_table.setRowCount(len(job_fields))
         header = self.ui.jobprofile_table.verticalHeader()
         header.setSectionResizeMode(QHeaderView.Stretch)
         i = 0
         for field in job_fields:  # build headers
             self.ui.jobs_table.setVerticalHeaderItem(i, QTableWidgetItem(field))
             i += 1
         results['jobs'].sort(key=lambda x: x.app_date and x.link_to_job, reverse=True)    # sort by app_date
         if results['jobs']:
             col = 0
             self.ui.jobs_table.setColumnCount(len(results['jobs']))
             for result in results['jobs']:
                 row = 0
                 for field in job_fields:
                     cell_val = getattr(result,field)
                     if cell_val is None:
                         cell_val = ''
                     self.ui.jobs_table.setItem(row,col,QTableWidgetItem(str(cell_val)))
                     row+=1
                     if row>len(job_fields)-1:
                         break
                 col+=1
         else:
             self.ui.jobs_table.setColumnCount(0)
     except:
         CommonFuncs.log(self,'query unsuccessful',level='debug')
         pass
     CommonFuncs.log('query of jobs in db successful')

Пример #12

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def run(self):
        self.isRunning()

        self.set_error(False)   # reset error

        Bot_Class = eval(self.site_bot_name)

        site_name = self.site_bot_name.split('_Bot')[0]
        spider_name = '_' + site_name.lower() + '_' + 'webcrawler.py'

        cached_username = ''
        cached_password = ''
        logged_in = False

        # APPLY LOOP
        bot = CommonFuncs.get_bot(self.site_bot_name)
        new_links = ['']
        with CommonFuncs.get_driver(visible=WEB_DRIVERS_VISIBLE, headless=WEB_DRIVERS_HEADLESS) as driver:
            bot_inst = Bot_Class(driver)
            while bot.is_running and len(new_links)>0:
                if cached_username != bot.username or cached_password != bot.password:  # if the username or password changed, attempt new login
                    cached_username = bot.username
                    cached_password = bot.password
                    logged_in = bot_inst.login(bot)
                if logged_in:  # if logged in and bot is running, apply to a job
                    with CommonFuncs.get_db() as db:
                        try:
                            new_to_db = False
                            while not new_to_db:
                                unprocessed_job = db.query(UnprocessedJob).filter(
                                    UnprocessedJob.bot_type == self.site_bot_name).all()
                                new_link = unprocessed_job[0].job
                                db.delete(unprocessed_job[0])
                                db.commit()
                                db_results = db.query(Job).filter(Job.link_to_job == new_link).all()
                                if db_results is None or db_results == []: new_to_db = True
                        except:
                            new_link = None
                            pass
                    if not new_link is None:
                        CommonFuncs.log(self, 'attempting to apply to: ' + new_link)
                        new_job = bot_inst.apply(new_link)  # goto page and apply
                        if new_job != False and isinstance(new_job, Job):    # only add the job to database, if it is an instance
                            with CommonFuncs.get_db() as db:    # save job object to db
                                try:
                                    db.add(new_job)
                                    db.commit()
                                except Exception as e:
                                    print(e)
                    else:
                        CommonFuncs.log('applier taking a timeout as it waits for more job links')
                        Jobbybot.run_bot_job_link_webcrawler( spider_name=spider_name ) # start the webcrawler for this bot
                        sleep_count = 5
                        for i in range(sleep_count):    # wait for more results, check to make sure the bot is still running
                            if CommonFuncs.is_bot_running(self.site_bot_name):
                                sleep(1)
                            else:
                                break
                bot = CommonFuncs.get_bot(self.site_bot_name)
                sleep(0.1)

        self.isFinished()

Пример #13

0

Показать файл

    def apply(self, job):
        '''apply to job, create and commit job object to db, and return job object.'''

        job_profile = None
        with CommonFuncs.get_db() as db:  # if no job profile, return
            try:
                job_profile = db.query(JobProfile).one()
            except:
                return

        # NAVIGATE TO JOB PAGE
        self.driver.get(job)

        # CREATE JOB OBJECT
        new_job = Job()
        new_job.app_date = datetime.now()
        new_job.link_to_job = job
        try:
            new_job.job_title = self.driver.find_element(
                By.CLASS_NAME, 'jobtitle').text
            new_job.company = self.driver.find_element(By.CLASS_NAME,
                                                       'company').text
            new_job.location = self.driver.find_element(
                By.CLASS_NAME, 'location').text
        except:
            pass
        new_job.job_site = CommonFuncs.fetch_domain_name(job)
        new_job.appled = False

        try:
            self.driver.implicitly_wait(1)
            try:
                self.driver.find_element_by_class_name(
                    'indeed-apply-button').click()
            except:
                return new_job
            CommonFuncs.switch_frames(self.driver,
                                      'iframe[name$=modal-iframe]')

            # RESUME UPLOAD
            try:
                resume_file = eval(job_profile.resume)[0]
                self.driver.implicitly_wait(2)
                resume_file = resume_file.replace('/', '//')
                self.driver.find_element_by_id('resume').send_keys(resume_file)
            except:
                return new_job

            # UNIQUE(optional) COVER LETTER
            try:
                cover_letter_text = CommonFuncs.extract_text(
                    eval(job_profile.cover_letter)[0])
                if '{{company_name}}' in cover_letter_text:
                    cover_letter_text = cover_letter_text.replace(
                        '{{company_name}}', job['company'])
                if '{{job_title}}' in cover_letter_text:
                    cover_letter_text = cover_letter_text.replace(
                        '{{job_title}}', job['jobtitle'])

                cl_box = self.driver.find_element_by_tag_name('textarea')
                cl_box.clear()
                self.driver.implicitly_wait(1)
                for c in cover_letter_text:  # send characters one at a time (otherwise some are consumed)
                    cl_box.send_keys(c)
            except:
                pass

            # SUPPORTING DOCUMENTATION - if requested and available from user
            try:
                isdoc = 1
                supp_docs = []
                for sdoc in eval(job_profile.supporting_docs):
                    self.driver.find_element_by_id('multattach' +
                                                   str(isdoc)).send_keys(sdoc)
                    isdoc += 1
            except:
                pass

            # FILL OUT OTHER QUESTIONS & SUBMIT
            try:
                # FILL IN FULL NAME
                try:
                    self.driver.find_element_by_id('applicant.name').send_keys(
                        job_profile.applicant_name)
                except:
                    pass
                # FIRST NAME
                try:
                    self.driver.find_element_by_id('applicant.firstName').\
                        send_keys(job_profile.applicant_name.split(' ')[0])
                except:
                    pass
                # LAST NAME
                try:
                    self.driver.find_element_by_id('applicant.lastName').\
                        send_keys(job_profile.applicant_name.split(' ')[1])
                except:
                    pass
                # PHONE NUMBER
                try:
                    self.driver.find_element_by_id('applicant.phoneNumber').\
                        send_keys(job_profile.phone_number)
                except:
                    pass
                # ADDRESS
                try:
                    self.driver.find_element_by_xpath('//*[@data-prefill-id="q_basic_0_street_address"]')\
                        .send_keys(job_profile.your_address)
                except:
                    pass
                # CITY
                try:
                    self.driver.find_element_by_xpath('//*[@data-prefill-id="q_basic_0_city"]')\
                        .send_keys(job_profile.your_city)
                except:
                    pass

                # QUESTION AND ANSWER
                self.driver.find_element_by_id('apply').click()
                self.driver.implicitly_wait(1)
                new_job.applied = True
            except (NoSuchElementException, ElementNotVisibleException
                    ):  # catch event where there is no continue
                self.driver.find_element_by_id('apply').click()
                self.driver.implicitly_wait(1)
                new_job.applied = True
            finally:
                self.driver.switch_to.window(self.driver.window_handles[0])
        except (NoSuchElementException, ElementNotVisibleException):
            new_job.appled = False

        return new_job

Пример #14

0

Показать файл

Файл: _indeed_webcrawler.py Проект: ConnorSMaynes/jobbybot

    def parse(self, response):

        with CommonFuncs.get_db() as db:
            todoforsite = db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == 'Indeed_Bot').all()
        if len(todoforsite) >= 100:
            return

        this_url = response._url
        try:
            searching_by = dict(parse_qsl(urlsplit(this_url).query))
            print('searching by: ' + str(searching_by))
        except:
            pass
        # CommonFuncs.log('starting parsing job page for IndeedWebcrawler: ' + response.url)

        # COLLECT NEW JOB LINKS FROM SITE
        jobs = response.xpath("//div[@data-tn-component='organicJob']")
        new_count = 0
        for job in jobs:
            bot = CommonFuncs.get_bot('Indeed_Bot')
            if not bot.is_running: return    # exit if the bot is not running
            extracted_job = job.extract()
            job_state = None
            if 'Easily apply' in extracted_job:
                job_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + job.xpath('h2/a/@href').extract()[0]
                with CommonFuncs.get_db() as db:
                    db_results = db.query(Job).filter(Job.link_to_job == job_link).all()
                if db_results is None or db_results == []:
                    new_count += 1
                    try:
                        with CommonFuncs.get_db() as db:
                            u_job = UnprocessedJob()
                            u_job.bot_type = 'Indeed_Bot'
                            u_job.job = job_link
                            db.add(u_job)
                            db.commit()
                    except:
                        pass

        # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response.url))
        if new_count > 0: print('%s new jobs found on page' % new_count)

        ##########
        # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING
        ##########
        pagination_links = response.xpath( "//div[@class='pagination']/a" ).extract()
        for link in pagination_links:
            if 'Next' in link:
                bot = CommonFuncs.get_bot('Indeed_Bot')
                if bot.is_running:  # verify that the bot is running before continuing to the next page
                    # CommonFuncs.log('finished parsing job page for IndeedWebcrawler: ' + this_url)
                    next_link = bs(link,'lxml').body.find('a').get('href')
                    full_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + next_link
                    yield scrapy.Request( url=full_link, callback=self.parse )
                else:
                    return


# if __name__ == '__main__':
#     runner = CrawlerRunner()
#     runner.crawl(IndeedWebcrawler)
#     d = runner.join()
#     d.addBoth(lambda _: reactor.stop())
#     reactor.run()

Пример #15

0

Показать файл

    def parse(self, response):

        with CommonFuncs.get_db() as db:
            todoforsite = db.query(UnprocessedJob).filter(
                UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all()
        if len(todoforsite) >= 100:
            return

        # EXTRACT JOB LINKS ON THE PAGE AND COMMIT TO DB
        this_url = response._url
        try:
            searching_by = dict(parse_qsl(urlsplit(this_url).query))
            print('searching by: ' + str(searching_by))
        except:
            pass
        # CommonFuncs.log('starting parsing job page for ZiprecruiterWebcrawler: ' + response.url)

        new_jobs = None
        try:  #@data-tracking='quick_apply'
            new_jobs = response.xpath(
                "//div[@class='job_results']/article/div[@class='job_tools']/"
                + "button[@data-tracking='quick_apply']" +
                "/ancestor::article" +
                "/div[@class='job_content']/a/@href").extract()
        except:
            # CommonFuncs.log('could not find jobs on the page: ' + this_url)
            pass
        new_count = 0
        if not new_jobs is None:  # if no results found return
            for job_link in new_jobs:  # dump the job links to the db
                with CommonFuncs.get_db() as db:
                    db_results = db.query(Job).filter(
                        Job.link_to_job == job_link).all()
                if db_results is None or db_results == []:
                    try:
                        with CommonFuncs.get_db() as db:
                            u_job = UnprocessedJob()
                            u_job.bot_type = 'Ziprecruiter_Bot'
                            u_job.job = job_link
                            db.add(u_job)
                            db.commit()
                            new_count += 1
                    except:
                        # CommonFuncs.log('something went wrong in ZiprecruiterWebcrawler trying to commit job link: %s' % job_link, level='debug')
                        pass

        # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response._url) )
        if new_count > 0: print('%s new jobs found on page' % new_count)

        ##########
        # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING
        ##########

        data_next_url = ''
        try:
            data_next_url = response.xpath("//div[@class='job_results']")
            data_next_url = data_next_url[0].root.attrib['data-next-url']
            if len(data_next_url) > 0:
                url = JOB_SITE_LINKS['Ziprecruiter'][
                    'job_site_base'] + data_next_url
                bot = CommonFuncs.get_bot('Ziprecruiter_Bot')
                # CommonFuncs.log('finished parsing job page for ZiprecruiterWebcrawler: ' + this_url)
                if bot.is_running:  # verify that the bot is running before continuing to the next page
                    yield scrapy.Request(url=url, callback=self.parse)
                else:
                    return
        except:
            pass


# if __name__ == '__main__':
#     runner = CrawlerRunner()
#     runner.crawl(ZiprecruiterLoginWebcrawler(username='******', password='******'))
#     d = runner.join()
#     d.addBoth(lambda _: reactor.stop())
#     reactor.run()

Пример #16

0

Показать файл

    def start_requests(self):
        '''return iterable of job links'''

        with CommonFuncs.get_db() as db:
            todoforsite = db.query(UnprocessedJob).filter(
                UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all()
        if len(todoforsite) >= 100:
            return

        start_time = datetime.now()

        job_profile = CommonFuncs.get_job_profile()
        locations = CommonFuncs.get_locations_list(job_profile)
        query_list = CommonFuncs.build_query_string(job_profile=job_profile,
                                                    or_delim='',
                                                    bracket1='',
                                                    bracket2='',
                                                    adv_supp=False)

        if len(query_list) == 0: return

        ##########
        # URL ENCODE EACH QUERY
        ##########
        start_urls = []
        for location in locations:
            for query_string in query_list:
                bot = CommonFuncs.get_bot('Ziprecruiter_Bot')
                if bot.is_running:  # verify that the bot is running before continuing to the next page
                    query_dict = {'search': query_string, 'location': location}
                    encoded_query = urllib.parse.urlencode(query_dict, safe='')
                    job_url = JOB_SITE_LINKS['Ziprecruiter'][
                        'query'] + '&' + encoded_query
                    start_urls.append(job_url)
                    response = html.fromstring(requests.get(job_url).content)
                    temp = response.xpath(
                        "//menu[@class='select-menu-submenu t_filter_dropdown_titles']/a/@href"
                    )
                    temp = [
                        JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i
                        for i in temp
                    ]
                    start_urls += temp  # append all of the links from filtering by job title
                    temp = response.xpath(
                        "//menu[@class='select-menu-submenu t_filter_dropdown_companies']/a/@href"
                    )
                    temp = [
                        JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i
                        for i in temp
                    ]
                    start_urls += temp  # append all of the links from filtering by company
                else:
                    return

        msg = 'time spent building start_urls for Ziprecruiter: ' + str(
            datetime.now() - start_time)
        # CommonFuncs.log( msg )
        print(msg)

        ##########
        # GET URL RESPONSES AND CALL PARSE FUNCTION TO ITERATE OVER PAGES
        ##########
        print('TOTAL START URLs: ' + str(len(start_urls)))
        i = 1
        for url in start_urls:
            print('LINK#: ' + str(i) + ' WORKING ON NEW START URL: ' + url)
            yield scrapy.Request(url=url, callback=self.parse)
            i += 1

Пример #17

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def initialize_gui(self):
        '''initialize numerous gui settings before launching the application'''

        # JOB SITE ACCOUNT SECTION
        self.setup_job_site_account_section()

        # JOB PROFILE TABLE
        jobprofile = None
        try:
            with CommonFuncs.get_db() as db:
                jobprofile = db.query(JobProfile).one()
        except:
            pass
        if not jobprofile:
            jobprofile = JobProfile()
        job_profile_fields = JobProfile.__table__.columns.keys()
        self.ui.jobprofile_table.doubleClicked.connect(self.file_select)
        self.ui.jobprofile_table.setColumnCount(1)
        self.ui.jobprofile_table.setRowCount(len(job_profile_fields) - 1)
        self.ui.jobprofile_table.setEditTriggers(QAbstractItemView.DoubleClicked | QAbstractItemView.SelectedClicked)
        header = self.ui.jobprofile_table.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.Stretch)

        # JOB PROFILE TABLE HEADERS
        i = 0
        for field in job_profile_fields:  # build headers
            if not field == 'id':  # don't show the id
                self.ui.jobprofile_table.setVerticalHeaderItem(i, QTableWidgetItem(field))
            else:
                i -= 1
            i += 1

        # LOAD JOB PROFILE FROM DB
        i = -1  # when setting table widget values, the index starts at -1
        for field in job_profile_fields:
            if not field == 'id':  # don't show the id
                self.ui.jobprofile_table.setItem(i, 1, QTableWidgetItem(getattr(jobprofile, field)))
            else:  # return to previous row, to avoid skipping a row's header
                i -= 1
            i += 1
        CommonFuncs.log(self,'finished loading job profile from db')
        self.ui.jobprofile_table.setHorizontalHeaderItem(0, QTableWidgetItem('values'))
        self.ui.jobprofile_table.cellChanged.connect(self.job_profile_table_edited)

        # JOB PROFILE SEARCH RESULTS
        self.set_progress_bar_gif(self.ui.matchupload_lbl, static_key='hourglass_big')
        self.ui.matchupload_lbl.setStyleSheet('background-color: rgb(225,225,225)')
        self.ui.matchupload_lbl.setAlignment(Qt.AlignCenter)
        self.ui.matchupload_lbl.hide()
        self.ui.matchup_table.setColumnCount(1)
        self.ui.matchup_table.setHorizontalHeaderItem(0, QTableWidgetItem('Top Results'))
        header = self.ui.matchup_table.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.Stretch)

        # DELETE ON JOB PROFILE EDIT -- this must be initialized after table edit function run
        with CommonFuncs.get_db() as db:
            try:
                settings = db.query(JobbybotSettings).one()
            except:
                pass
            if settings.delete_ujobs_on_jprofile_edit == True:
                self.ui.delete_ujobs_on_jprofile_edit_check.setChecked(True)

        # SEARCH INTERFACE SETUP
        self.ui.search_btn.setIcon(QIcon(STATIC_FILES['search']))
        self.ui.search_btn.setIconSize(QSize(28, 28))

        # STATS THREADS
        self.threads['stats'] = StatsUpdateThread()
        self.threads['stats'].stats.connect(self.update_stats)

        # SEARCH TABLE AND BOX
        self.ui.tabs.setCurrentIndex(0)  # select jobs tab
        self.ui.search_btn.clicked.connect(self.search_table_btn_clicked)
        self.ui.search_box.returnPressed.connect(self.search_table_btn_clicked)
        self.threads['database_tables'] = DatabaseTablesThread()
        self.threads['database_tables'].update_tables.connect(self.search_table_btn_clicked)
        self.search_table_btn_clicked()

Пример #18

0

Показать файл

    def apply(self, job):
        '''apply to the job and store in db. return the job object.'''

        job_profile = None
        with CommonFuncs.get_db() as db:  # if no job profile, return
            try:
                job_profile = db.query(JobProfile).one()
            except:
                return

        self.driver.get(job)  # navigate to job page

        # build job object
        new_job = Job()
        new_job.app_date = datetime.now()
        new_job.link_to_job = job
        new_job.job_title = self.driver.find_element_by_xpath(
            '//*[@id="frame"]/div[4]/div/div/div/div[2]/div/div[1]/div[1]/div[1]/div/div[2]/h1'
        ).text
        new_job.job_site = CommonFuncs.fetch_domain_name(
            self.driver.current_url)
        new_job.applied = False  # default

        # GET EMPLOYER NAME
        try:
            employer_name = self.driver.find_element_by_class_name(
                'job_emp_details').find_element_by_tag_name('a').text
            new_job.company = employer_name
        except:
            pass

        # GET LOCATION
        try:
            other_data = self.driver.find_elements_by_class_name('job-bfields')
            for datum in other_data:
                if "location" in datum.find_element_by_class_name(
                        'label').text.lower():
                    job_location = datum.find_element_by_class_name(
                        'widget').text
                    new_job.location = job_location
                    break
        except:
            pass

        # GET CONTACT INFO - if available
        try:
            contact_block = self.driver.find_element_by_id('sb_contactinfo')
            contents = contact_block.text.split('\n')
            new_job.contact_name = contents[1]
            for item in contents:
                if "@" in item:
                    new_job.contact_email = item
                elif "http" in item:
                    new_job.company_site = item
        except:
            pass

        # OPEN APPLICATION FORM
        try:
            job_apply_button = self.driver.find_element_by_id('job_send_docs')
            job_apply_button.click()
            docs_used = []
            # make doc selections
            doc_fields = [
                'resume', 'cover_letter', 'writing_sample', 'transcript'
            ]
            supporting_docs = []
            for doc_field in doc_fields:
                try:
                    select = Select(
                        self.driver.find_element_by_name(
                            "dnf_class_values[non_ocr_job_resume][%s]" %
                            doc_field))
                    if doc_field == "resume":
                        select.select_by_index(0)
                    else:
                        select.select_by_index(
                            1)  # select the first available cover letter
                except:
                    pass

            # SUBMIT APPLICATION
            try:
                self.driver.find_element_by_xpath(
                    "//*[@id='job_resume_form']").find_element_by_name(
                        "dnf_opt_submit").click()
                new_job.applied = True
            except:
                new_job.appled = False
        except:
            new_job.appled = False

        return new_job

Пример #19

0

Показать файл

Файл: jobbybot.py Проект: ConnorSMaynes/jobbybot

    def __init__(self):

        self.init_process = True    # some processes in functions disabled during initialization

        if os.path.isfile(LOG_FILE_PATH): os.remove(LOG_FILE_PATH)    # delete the log from the last session
        CommonFuncs.log(self, 'Jobbybot session started')

        self.user_settings = None   # store login creds, job profile, etc
        self.threads = THREADS_DICT

        # RESET ALL BOTS TO NOT IS_RUNNING
        for j_site in JOB_SITE_LINKS:
            site_bot_name = j_site + '_Bot'
            with CommonFuncs.get_db() as db:
                try:
                    bot = CommonFuncs.get_bot(site_bot_name)
                    bot.is_running = False
                except:
                    bot = JobSiteAccount()
                    bot.is_running = False
                    bot.site_bot_name = site_bot_name
                db.add(bot)
                db.commit()
                CommonFuncs.log(self,'reset %s to not running in db' % site_bot_name)

        # CHECK FOR SETTINGS OBJECT - create if it does not exist
        settings = None
        with CommonFuncs.get_db() as db:
            try:
                settings = db.query(JobbybotSettings).one()
            except:
                pass
            if not settings:
                new_settings = JobbybotSettings()
                new_settings.connect_to_gsheets = False
                new_settings.delete_ujobs_on_jprofile_edit = True
                db.add(new_settings)
                db.commit()    # add settings object to database

        # START GUI SETUP
        app = QApplication(sys.argv)
        self.MainWindow = QtWidgets.QMainWindow()
        self.ui = Ui_MainWindow()
        self.ui.setupUi(self.MainWindow)
        QApplication.setStyle(QStyleFactory.create('Fusion'))
        self.MainWindow.setWindowIcon(QIcon(STATIC_FILES['logo']))
        self.MainWindow.setGeometry(0,60,778,629)

        self.initialize_gui()

        CommonFuncs.log(self,'finished initializing gui')
        CommonFuncs.log(self,'Launching Jobbybot!')

        self.threads['stats'].start()
        self.threads['database_tables'].start()

        # OPEN AND RUN THE GUI
        self.init_process = False
        self.MainWindow.show()
        self.job_profile_table_edited()  # initial population of the results for the job profile
        sys.exit(app.exec_())

Пример #20

0

Показать файл

    def get_new_link(self, desired_result_count=1, need_new_results=True):
        '''apply filters from the job_profile and return a new job link.'''

        job_profile = None
        with CommonFuncs.get_db(
        ) as db:  # if no job profile, return an empty link
            try:
                job_profile = db.query(JobProfile).one()
            except:
                return ''

        self.driver.get(JOB_SITE_LINKS['job_site'])  # navigate to the job site

        self.driver.find_element_by_link_text(
            'Advanced Search').click()  # open the filter settings

        # Open the ajax more filters form expander
        hyperlinks = self.driver.find_element_by_class_name(
            "content-container-inner").find_elements_by_tag_name('a')
        for hyperlink in hyperlinks:
            try:
                if str(hyperlink.text).lower() == "more filters":
                    hyperlink.click()
                    break
            except:
                pass

        # FILL IN ALL COMBOBOXES AND LISTBOXES
        combobox_filters = \
            {
                "//*[@id='advsearch_ocr_']": 'job_types',
                "//*[@id='jobfilters_industry___']": 'industries',
                "//*[@id='jobfilters_job_type___']": 'job_types',
                "//*[@id='advsearch_job_custom_field_2___']": 'terms_of_employment',
                "//*[@id='advsearch_multi_state___']": 'states',
                "//*[@id='advsearch_multi_country___']": 'countries',
                "//*[@id='advsearch_work_authorization___']": 'work_authorizations'
            }
        for box_filter in combobox_filters:
            combo_selections = CommonFuncs.combo_select(
                combobox=self.driver.find_element_by_xpath(box_filter),
                visible_text_list=getattr(job_profile,
                                          combobox_filters[box_filter]))

        # FILL IN ZIP CODE AND RADIUS IF APPLICABLE
        zip_box = self.driver.find_element_by_xpath(
            "//*[@id='jobfilters_distance_search__base_']")
        radius_box = self.driver.find_element_by_xpath(
            "//*[@id='jobfilters_distance_search__distance_']")
        if len(job_profile.zip_code) > 0:
            try:
                zip_box.send_keys(job_profile.zip_code)
                radius_box.send_keys(job_profile.radius)
            except:
                pass

        # EXCLUDE JOBS ALREADY APPLIED TO
        self.driver.find_element_by_name("jobfilters[exclude_applied_jobs]"
                                         ).click()  # exclude jobs applied for

        # ONLY INCLUDE JOBS IN THE SELECTED MAJOR
        self.driver.find_element_by_name(
            "advsearch[major_ignore_all_pick]").click(
            )  # include only selected major

        # APPLY FILTERS
        self.driver.find_element_by_xpath(
            r'//*[@id="frame"]/div[4]/div/div/div[2]/div[1]/div/form/div/div[2]/span[1]/input[1]'
        ).click()  # apply filters

        # SUBMIT KEYWORDS
        try:
            keywords_string = job_profile.keywords
            search_box = self.driver.find_element_by_name(
                "jobfilters[keywords]")
            search_box.clear()
            sleep(3)
            search_box.send_keys(keywords_string)
            # CLICK SEARCH BUTTON
            self.driver.find_element_by_xpath(
                "//*[@id='frame']/div[4]/div/div/div[2]/div[1]/div/form/div/div[1]/input[2]"
            ).click()
        except WebDriverException:
            pass

        # FIND A NEW LINK NOT YET PROCESSED BY ANY BOT THAT MATCHES THE FILTER RESULTS
        try:
            soup = BeautifulSoup(self.driver.page_source)
            jobs_on_page_container = soup.find(
                'div', {'id': 'student_job_list_content'})
            jobs_on_page = jobs_on_page_container.find('ul').findAll('li')
            pages = self.driver.find_element_by_name(
                '_pager').find_elements_by_tag_name('option')
        except AttributeError:
            return []

        new_jobs_list = []

        for page in pages:
            try:
                soup = BeautifulSoup(
                    self.driver.page_source)  # get  the page source html
            except:
                return []
            try:
                jobs_on_page_container = soup.find(
                    'div', {'id': 'student_job_list_content'})
                jobs_on_page = jobs_on_page_container.find('ul').findAll('li')
            except:
                pass

            if jobs_on_page:

                for job in jobs_on_page:
                    job_links = job.findAll('a')  # find all links in list item

                    for link in job_links:

                        if '?mode' in link[
                                'href']:  # urls with this encoding goto the page for that job

                            if need_new_results:
                                with CommonFuncs.get_db() as db:
                                    db_matches = db.query(Job).filter(
                                        Job.link_to_job.contains(
                                            link['href'])).all()
                            else:
                                db_matches = []

                            if not db_matches:  # if this job has not been processed by any bot yet, return the link
                                if desired_result_count == 1:
                                    return JOB_SITE_LINKS[
                                        'job_site_base'] + link['href']
                                elif len(new_jobs_list) < desired_result_count:
                                    new_jobs_list.append(
                                        JOB_SITE_LINKS['job_site_base'] +
                                        link['href'])
                                else:
                                    return new_jobs_list

            try:
                self.driver.find_element_by_link_text('Next').click()
            except (NoSuchElementException, AttributeError):
                pass

        return []  # if no new links

Python CommonFuncs.get_db примеры использования