示例#1
0
    def play_selected_bot_thread(self):
        '''run the bot selected from the dropdown menu.'''

        job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot'
        CommonFuncs.log( self, 'attempting to play selected: %s' % job_site_bot_name)
        with CommonFuncs.get_db() as db:
            try:
                bot = CommonFuncs.get_bot(job_site_bot_name)
                bot.is_running = True
            except:
                bot = JobSiteAccount()
                bot.site_bot_name = job_site_bot_name
                bot.is_running = True
            db.add(bot)
            db.commit()

        jobsiteaccount = CommonFuncs.get_bot( job_site_bot_name )
        if jobsiteaccount.username is None or jobsiteaccount.password is None:
            CommonFuncs.log(self, 'no valid login creds available')
            CommonFuncs.log(self, 'playing of bot canceled')
            return

        if bot_threads[job_site_bot_name]['applier'] is None or not bot_threads[job_site_bot_name]['applier'].isRunning():
            bot_threads[job_site_bot_name]['applier'] = BotThread(job_site_bot_name)  # only build thread, if it doesn't exist
            bot_threads[job_site_bot_name]['applier'].started.connect(self.bot_thread_started)
            bot_threads[job_site_bot_name]['applier'].finished.connect(self.bot_thread_finished)
            bot_threads[job_site_bot_name]['applier'].start()

            CommonFuncs.log(self, 'playing of %s successful!' % job_site_bot_name)
        else:
            CommonFuncs.log(self, 'playing of %s unsuccessful!' % job_site_bot_name)
示例#2
0
 def job_site_account_select(self):
     '''load user's account creds for the selected site.'''
     job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot'
     CommonFuncs.log(self, 'starting to find the account creds and stats for the user after job site account select')
     todo_count = 0
     applied_count = 0
     try:
         with CommonFuncs.get_db() as db:
             todo_count = len(db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site_bot_name).all())
             applied_count = len(db.query(Job).filter(
                 and_(
                     Job.job_site == JOB_SITE_LINKS[self.ui.jobsite_select.currentText()]['job_site'],
                     Job.applied == True
                 )).all())
     except:
         pass
     self.ui.todoforsite_btn.setText(str(todo_count))
     self.ui.appliedforsite_btn.setText(str(applied_count))
     jobsiteaccount = None
     jobsiteaccount = CommonFuncs.get_bot(job_site_bot_name)
     if not jobsiteaccount:
         self.ui.jobsiteusername_box.setText('')
         self.ui.jobsitepassword_box.setText('')
     else:
         self.ui.jobsiteusername_box.setText(jobsiteaccount.username)
         self.ui.jobsitepassword_box.setText(jobsiteaccount.password)
         if jobsiteaccount.is_running:
             self.ui.playload_lbl.show()
         else:
             self.ui.playload_lbl.hide()
     self.ui.jobsiteusername_box.setStyleSheet( 'background-color: white' )
     self.ui.jobsitepassword_box.setStyleSheet( 'background-color: white' )
     self.ui.verify_btn.setIcon( QtGui.QIcon( STATIC_FILES[ 'checked' ] ) )
     CommonFuncs.log(self, 'finished finding the account creds and stats for the user after job site account select')
示例#3
0
    def delete_selected_job_site(self):
        msg = QMessageBox()  # show error message
        msg.setIcon(QMessageBox.Critical)
        msg.setText("Your login creds and unprocessed jobs will be deleted for this site.")
        msg.setInformativeText( "Are you sure you want to continue?" )
        msg.setWindowTitle("Warning About Deletion: Irreversible")
        msg.setStandardButtons(QMessageBox.Ok | QMessageBox.Cancel)
        reply = msg.exec()

        if reply == QMessageBox.Ok:
            self.ui.deleteload_lbl.show()
            job_site = self.ui.jobsite_select.currentText() + '_Bot'
            with CommonFuncs.get_db() as db:
                # DELETE ACCOUNT
                jobsiteaccount = CommonFuncs.get_bot( job_site )
                if not jobsiteaccount is None:
                    db.delete(jobsiteaccount)
                    db.commit()
                    CommonFuncs.log(self, 'successfully deleted account for: ' + job_site)
                # DELETE ANY UNPROCESSED JOBS
                db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == job_site).delete(synchronize_session=False)
                db.commit()
                CommonFuncs.log(self, 'successfully deleted all unprocessed jobs for account: ' + job_site)
            self.job_site_account_select()  # refresh job site account section of gui
            self.ui.deleteload_lbl.hide()
示例#4
0
 def pause_selected_bot_thread(self):
     job_site_bot_name = self.ui.jobsite_select.currentText() + '_Bot'
     CommonFuncs.log(self, 'attempting to send pause signal to bot: %s' % job_site_bot_name )
     with CommonFuncs.get_db() as db:
         try:
             bot = CommonFuncs.get_bot(job_site_bot_name)
             if bot.is_running:
                 self.ui.pauseload_lbl.show()    # only show loading gif if there is bot to pause
                 bot.is_running = False
                 db.add(bot)
                 db.commit()
         except:
             CommonFuncs.log(self, 'problem sending pause signal for bot: %s' % job_site_bot_name, level='debug')
             pass
     CommonFuncs.log(self,'pause signal for %s successfully sent' % job_site_bot_name)
示例#5
0
    def verify_job_site_account_thread_finished(self):

        CommonFuncs.log(self, 'completed verification process of account creds')

        self.ui.jobsiteusername_box.setEnabled(True)
        self.ui.jobsitepassword_box.setEnabled(True)
        self.ui.jobsite_select.setEnabled(True)
        self.ui.jobsiteaccountcancel_btn.setEnabled(True)
        self.ui.verifyload_lbl.hide()
        self.ui.jobsiteaccountcancel_btn.setIcon(QIcon(STATIC_FILES['revert']))
        if self.threads['verify_job_site_account'].error == True:
            self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(247, 126, 74)')
            self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(247, 126, 74)')
            self.ui.verify_btn.setIcon(
                QtGui.QIcon(STATIC_FILES['submit']))  # show the site creds need to be verified
            msg = QMessageBox() # show error message
            msg.setIcon(QMessageBox.Critical)
            msg.setText("Job Site Account Verification Failed")
            msg.setInformativeText("Please correct your username and password and try again.")
            msg.setWindowTitle("Job Site Login Failed")
            msg.setStandardButtons(QMessageBox.Ok)
            msg.exec()
        else:
            # COMMIT THE JOB SITE ACCOUNT CREDS
            jobsitestring = str(self.ui.jobsite_select.currentText()) + '_Bot'
            jobsiteaccount = None
            try:
                jobsiteaccount = CommonFuncs.get_bot(jobsitestring)
            except:
                pass
            if not jobsiteaccount:
                jobsiteaccount = JobSiteAccount()

            jobsiteaccount.site_bot_name = jobsitestring
            jobsiteaccount.username = self.ui.jobsiteusername_box.text()
            jobsiteaccount.password = self.ui.jobsitepassword_box.text()

            with CommonFuncs.get_db() as db:
                db.add(jobsiteaccount)
                db.commit()

            CommonFuncs.log(self, 'successfully stored valid account creds')

            self.ui.verify_btn.setIcon(
                QtGui.QIcon(STATIC_FILES['checked']))  # show the site creds have been verified
            self.ui.jobsiteusername_box.setStyleSheet('background-color: rgb(70, 188, 128)')
            self.ui.jobsitepassword_box.setStyleSheet('background-color: rgb(70, 188, 128)')
示例#6
0
    def __init__(self):

        self.init_process = True    # some processes in functions disabled during initialization

        if os.path.isfile(LOG_FILE_PATH): os.remove(LOG_FILE_PATH)    # delete the log from the last session
        CommonFuncs.log(self, 'Jobbybot session started')

        self.user_settings = None   # store login creds, job profile, etc
        self.threads = THREADS_DICT

        # RESET ALL BOTS TO NOT IS_RUNNING
        for j_site in JOB_SITE_LINKS:
            site_bot_name = j_site + '_Bot'
            with CommonFuncs.get_db() as db:
                try:
                    bot = CommonFuncs.get_bot(site_bot_name)
                    bot.is_running = False
                except:
                    bot = JobSiteAccount()
                    bot.is_running = False
                    bot.site_bot_name = site_bot_name
                db.add(bot)
                db.commit()
                CommonFuncs.log(self,'reset %s to not running in db' % site_bot_name)

        # CHECK FOR SETTINGS OBJECT - create if it does not exist
        settings = None
        with CommonFuncs.get_db() as db:
            try:
                settings = db.query(JobbybotSettings).one()
            except:
                pass
            if not settings:
                new_settings = JobbybotSettings()
                new_settings.connect_to_gsheets = False
                new_settings.delete_ujobs_on_jprofile_edit = True
                db.add(new_settings)
                db.commit()    # add settings object to database

        # START GUI SETUP
        app = QApplication(sys.argv)
        self.MainWindow = QtWidgets.QMainWindow()
        self.ui = Ui_MainWindow()
        self.ui.setupUi(self.MainWindow)
        QApplication.setStyle(QStyleFactory.create('Fusion'))
        self.MainWindow.setWindowIcon(QIcon(STATIC_FILES['logo']))
        self.MainWindow.setGeometry(0,60,778,629)

        self.initialize_gui()

        CommonFuncs.log(self,'finished initializing gui')
        CommonFuncs.log(self,'Launching Jobbybot!')

        self.threads['stats'].start()
        self.threads['database_tables'].start()

        # OPEN AND RUN THE GUI
        self.init_process = False
        self.MainWindow.show()
        self.job_profile_table_edited()  # initial population of the results for the job profile
        sys.exit(app.exec_())
示例#7
0
    def run(self):
        self.isRunning()

        self.set_error(False)   # reset error

        Bot_Class = eval(self.site_bot_name)

        site_name = self.site_bot_name.split('_Bot')[0]
        spider_name = '_' + site_name.lower() + '_' + 'webcrawler.py'

        cached_username = ''
        cached_password = ''
        logged_in = False

        # APPLY LOOP
        bot = CommonFuncs.get_bot(self.site_bot_name)
        new_links = ['']
        with CommonFuncs.get_driver(visible=WEB_DRIVERS_VISIBLE, headless=WEB_DRIVERS_HEADLESS) as driver:
            bot_inst = Bot_Class(driver)
            while bot.is_running and len(new_links)>0:
                if cached_username != bot.username or cached_password != bot.password:  # if the username or password changed, attempt new login
                    cached_username = bot.username
                    cached_password = bot.password
                    logged_in = bot_inst.login(bot)
                if logged_in:  # if logged in and bot is running, apply to a job
                    with CommonFuncs.get_db() as db:
                        try:
                            new_to_db = False
                            while not new_to_db:
                                unprocessed_job = db.query(UnprocessedJob).filter(
                                    UnprocessedJob.bot_type == self.site_bot_name).all()
                                new_link = unprocessed_job[0].job
                                db.delete(unprocessed_job[0])
                                db.commit()
                                db_results = db.query(Job).filter(Job.link_to_job == new_link).all()
                                if db_results is None or db_results == []: new_to_db = True
                        except:
                            new_link = None
                            pass
                    if not new_link is None:
                        CommonFuncs.log(self, 'attempting to apply to: ' + new_link)
                        new_job = bot_inst.apply(new_link)  # goto page and apply
                        if new_job != False and isinstance(new_job, Job):    # only add the job to database, if it is an instance
                            with CommonFuncs.get_db() as db:    # save job object to db
                                try:
                                    db.add(new_job)
                                    db.commit()
                                except Exception as e:
                                    print(e)
                    else:
                        CommonFuncs.log('applier taking a timeout as it waits for more job links')
                        Jobbybot.run_bot_job_link_webcrawler( spider_name=spider_name ) # start the webcrawler for this bot
                        sleep_count = 5
                        for i in range(sleep_count):    # wait for more results, check to make sure the bot is still running
                            if CommonFuncs.is_bot_running(self.site_bot_name):
                                sleep(1)
                            else:
                                break
                bot = CommonFuncs.get_bot(self.site_bot_name)
                sleep(0.1)

        self.isFinished()
示例#8
0
    def start_requests(self):
        '''return iterable of job links'''

        with CommonFuncs.get_db() as db:
            todoforsite = db.query(UnprocessedJob).filter(
                UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all()
        if len(todoforsite) >= 100:
            return

        start_time = datetime.now()

        job_profile = CommonFuncs.get_job_profile()
        locations = CommonFuncs.get_locations_list(job_profile)
        query_list = CommonFuncs.build_query_string(job_profile=job_profile,
                                                    or_delim='',
                                                    bracket1='',
                                                    bracket2='',
                                                    adv_supp=False)

        if len(query_list) == 0: return

        ##########
        # URL ENCODE EACH QUERY
        ##########
        start_urls = []
        for location in locations:
            for query_string in query_list:
                bot = CommonFuncs.get_bot('Ziprecruiter_Bot')
                if bot.is_running:  # verify that the bot is running before continuing to the next page
                    query_dict = {'search': query_string, 'location': location}
                    encoded_query = urllib.parse.urlencode(query_dict, safe='')
                    job_url = JOB_SITE_LINKS['Ziprecruiter'][
                        'query'] + '&' + encoded_query
                    start_urls.append(job_url)
                    response = html.fromstring(requests.get(job_url).content)
                    temp = response.xpath(
                        "//menu[@class='select-menu-submenu t_filter_dropdown_titles']/a/@href"
                    )
                    temp = [
                        JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i
                        for i in temp
                    ]
                    start_urls += temp  # append all of the links from filtering by job title
                    temp = response.xpath(
                        "//menu[@class='select-menu-submenu t_filter_dropdown_companies']/a/@href"
                    )
                    temp = [
                        JOB_SITE_LINKS['Ziprecruiter']['job_site_base'] + i
                        for i in temp
                    ]
                    start_urls += temp  # append all of the links from filtering by company
                else:
                    return

        msg = 'time spent building start_urls for Ziprecruiter: ' + str(
            datetime.now() - start_time)
        # CommonFuncs.log( msg )
        print(msg)

        ##########
        # GET URL RESPONSES AND CALL PARSE FUNCTION TO ITERATE OVER PAGES
        ##########
        print('TOTAL START URLs: ' + str(len(start_urls)))
        i = 1
        for url in start_urls:
            print('LINK#: ' + str(i) + ' WORKING ON NEW START URL: ' + url)
            yield scrapy.Request(url=url, callback=self.parse)
            i += 1
示例#9
0
    def parse(self, response):

        with CommonFuncs.get_db() as db:
            todoforsite = db.query(UnprocessedJob).filter(
                UnprocessedJob.bot_type == 'Ziprecruiter_Bot').all()
        if len(todoforsite) >= 100:
            return

        # EXTRACT JOB LINKS ON THE PAGE AND COMMIT TO DB
        this_url = response._url
        try:
            searching_by = dict(parse_qsl(urlsplit(this_url).query))
            print('searching by: ' + str(searching_by))
        except:
            pass
        # CommonFuncs.log('starting parsing job page for ZiprecruiterWebcrawler: ' + response.url)

        new_jobs = None
        try:  #@data-tracking='quick_apply'
            new_jobs = response.xpath(
                "//div[@class='job_results']/article/div[@class='job_tools']/"
                + "button[@data-tracking='quick_apply']" +
                "/ancestor::article" +
                "/div[@class='job_content']/a/@href").extract()
        except:
            # CommonFuncs.log('could not find jobs on the page: ' + this_url)
            pass
        new_count = 0
        if not new_jobs is None:  # if no results found return
            for job_link in new_jobs:  # dump the job links to the db
                with CommonFuncs.get_db() as db:
                    db_results = db.query(Job).filter(
                        Job.link_to_job == job_link).all()
                if db_results is None or db_results == []:
                    try:
                        with CommonFuncs.get_db() as db:
                            u_job = UnprocessedJob()
                            u_job.bot_type = 'Ziprecruiter_Bot'
                            u_job.job = job_link
                            db.add(u_job)
                            db.commit()
                            new_count += 1
                    except:
                        # CommonFuncs.log('something went wrong in ZiprecruiterWebcrawler trying to commit job link: %s' % job_link, level='debug')
                        pass

        # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response._url) )
        if new_count > 0: print('%s new jobs found on page' % new_count)

        ##########
        # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING
        ##########

        data_next_url = ''
        try:
            data_next_url = response.xpath("//div[@class='job_results']")
            data_next_url = data_next_url[0].root.attrib['data-next-url']
            if len(data_next_url) > 0:
                url = JOB_SITE_LINKS['Ziprecruiter'][
                    'job_site_base'] + data_next_url
                bot = CommonFuncs.get_bot('Ziprecruiter_Bot')
                # CommonFuncs.log('finished parsing job page for ZiprecruiterWebcrawler: ' + this_url)
                if bot.is_running:  # verify that the bot is running before continuing to the next page
                    yield scrapy.Request(url=url, callback=self.parse)
                else:
                    return
        except:
            pass


# if __name__ == '__main__':
#     runner = CrawlerRunner()
#     runner.crawl(ZiprecruiterLoginWebcrawler(username='******', password='******'))
#     d = runner.join()
#     d.addBoth(lambda _: reactor.stop())
#     reactor.run()
    def parse(self, response):

        with CommonFuncs.get_db() as db:
            todoforsite = db.query(UnprocessedJob).filter(UnprocessedJob.bot_type == 'Indeed_Bot').all()
        if len(todoforsite) >= 100:
            return

        this_url = response._url
        try:
            searching_by = dict(parse_qsl(urlsplit(this_url).query))
            print('searching by: ' + str(searching_by))
        except:
            pass
        # CommonFuncs.log('starting parsing job page for IndeedWebcrawler: ' + response.url)

        # COLLECT NEW JOB LINKS FROM SITE
        jobs = response.xpath("//div[@data-tn-component='organicJob']")
        new_count = 0
        for job in jobs:
            bot = CommonFuncs.get_bot('Indeed_Bot')
            if not bot.is_running: return    # exit if the bot is not running
            extracted_job = job.extract()
            job_state = None
            if 'Easily apply' in extracted_job:
                job_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + job.xpath('h2/a/@href').extract()[0]
                with CommonFuncs.get_db() as db:
                    db_results = db.query(Job).filter(Job.link_to_job == job_link).all()
                if db_results is None or db_results == []:
                    new_count += 1
                    try:
                        with CommonFuncs.get_db() as db:
                            u_job = UnprocessedJob()
                            u_job.bot_type = 'Indeed_Bot'
                            u_job.job = job_link
                            db.add(u_job)
                            db.commit()
                    except:
                        pass

        # CommonFuncs.log('%s new jobs found on page %s' % (new_count, response.url))
        if new_count > 0: print('%s new jobs found on page' % new_count)

        ##########
        # JUMP TO NEXT PAGE WHILE THE BOT IS STILL RUNNING
        ##########
        pagination_links = response.xpath( "//div[@class='pagination']/a" ).extract()
        for link in pagination_links:
            if 'Next' in link:
                bot = CommonFuncs.get_bot('Indeed_Bot')
                if bot.is_running:  # verify that the bot is running before continuing to the next page
                    # CommonFuncs.log('finished parsing job page for IndeedWebcrawler: ' + this_url)
                    next_link = bs(link,'lxml').body.find('a').get('href')
                    full_link = JOB_SITE_LINKS[ 'Indeed' ][ 'job_site_base' ] + next_link
                    yield scrapy.Request( url=full_link, callback=self.parse )
                else:
                    return


# if __name__ == '__main__':
#     runner = CrawlerRunner()
#     runner.crawl(IndeedWebcrawler)
#     d = runner.join()
#     d.addBoth(lambda _: reactor.stop())
#     reactor.run()