示例#1
0
 def __init__(self, crawler='', lister=''):
     self.jobs = []
     self.failed_jobs = []
     self.storage = LocalStorage()
     self.last_id = self.get_last_job_id()
     self.crawler = Loader.load('Crawlers', crawler) if crawler else None
     self.lister = Loader.load('Listers', lister,
                               self.last_id) if lister else None
     self.parse_utility = ParseUtility.ParseUtility()
示例#2
0
 def __init__(self):
     self.crawler = EngineerJobsCrawler()
     self.lister = EngineerJobsLister()
     self.stats = Stats()
     self.storage = LocalStorage()
     self.job_manager = JobManager(self.crawler, self.lister)
     self.setup_completed = False
     self.app = gui("JobFinder", "700x300")
     self.configure_app(self.app)
     self.app.go()
    def _loadToken(cls):
        local_storage = LocalStorage(cls._TOKEN_PATH)
        local_storage.setFileName(cls._TOKEN_FILENAME)

        if not local_storage.existFile():
            return False

        token_file=cls._TOKEN_PATH+"/"+cls._TOKEN_FILENAME
        with open(token_file, 'rb') as token:
            cls._TOKEN =pickle.load(token)

        return cls
示例#4
0
 def __init__(self):
     self.keyword_folder = os.path.join('Storage', 'config', 'keywords')
     self.data_path = os.path.join(self.keyword_folder, 'keyword_data.json')
     self.totals_path = os.path.join(self.keyword_folder, 'totals.json')
     self.keyword_names = LocalStorage.get_keyword_names()
     self.totals = self.load_totals()
     self.keywords = self.load_keywords()
示例#5
0
def run():
    LocalStorage().clear_database()
    manager = EngineerJobsManager()
    storage = manager.storage
    jobs = manager.get_jobs()
    storage.store_jobs(jobs)
    retrieved_jobs = storage.get_jobs_from_database()
    for job in retrieved_jobs:
        print(job.get_title())
    print('Total number of jobs: ' + str(len(jobs)))
示例#6
0
 def __init__(self):
     self.header = ''
     self.resume_path = LocalStorage.get_config_file_path(
         'resume/Resume.docx')
     self.text = self.extract_text(self.resume_path)
     self.section_whitelist = self.load_section_whitelist()
     self.section_names = self.get_section_names(self.section_whitelist)
     self.sections = self.get_sections()
     self.resume_config = self.get_resume_config()
     self.store_resume_config()
class Loader:
    _CONFIG_PATH = "config"
    _CONFIG_FILENAME = "config.json"
    _LOCAL_STORAGE = LocalStorage(_CONFIG_PATH)
    _JSON_CONFIG = None

    def __init__(self):
        self.loadConfiguration()

    @classmethod
    def setFileName(cls, fileName):
        if len(fileName) <= 0:
            return ValueError

        cls._CONFIG_FILENAME = fileName

        return cls

    @classmethod
    def loadConfiguration(cls):
        cls._LOCAL_STORAGE.setFileName(cls._CONFIG_FILENAME)

        if not cls._LOCAL_STORAGE.existFile():
            raise NameError("The configuration file was not found")

        json_content = cls._LOCAL_STORAGE.getFileContent()
        cls._JSON_CONFIG = json.loads(json_content)

        return cls

    @classmethod
    def getConfigurations(cls):
        return cls._JSON_CONFIG

    @classmethod
    def getConfig(cls, configName):

        if len(configName) <= 0:
            return None

        try:
            config_found = cls._JSON_CONFIG[configName]
            return config_found
        except TypeError:
            return None
        except ValueError:
            return None

    @staticmethod
    def get_config(configName):
        if len(configName) <= 0:
            return None

        config_loader = Loader()
        return config_loader.getConfig(configName)
    def _loadCredentials(cls):
        local_storage = LocalStorage(cls._CREDENTIALS_PATH)
        local_storage.setFileName(cls._CREDENTIALS_FILENAME)

        if not local_storage.existFile():
            raise NameError("missing file for credentials")

        file_content = local_storage.getFileContent()
        cls._CREDENTIALS = loads(file_content)

        return cls
    def _savePhisicalFiles(cls):

        if len(cls._FILES_FOUND) <= 0:
            return None

        index = 0
        for item in cls._FILES_FOUND:

            try:
                file = LocalStorage(cls._FOLDER_FORFILES)
                file.setFileName(item['fileName'])
                content = base64.urlsafe_b64decode(item['data'].encode('utf-8'))
                file.saveFileContent(content=content, mode="wb")
                print(f"File Saved {item['fileName']}")

            except:
                print(f"Fail to save the file {item['fileName']}")

        return cls
示例#10
0
 def store_resume_config(self):
     LocalStorage.store_json_config('resume/resume_config.json',
                                    self.resume_config)
示例#11
0
 def load_section_whitelist():
     section_names = LocalStorage.get_config_file_text(
         'resume/resume_sections.txt')
     return section_names.split('\n')
示例#12
0
class GUI:
    def __init__(self):
        self.crawler = EngineerJobsCrawler()
        self.lister = EngineerJobsLister()
        self.stats = Stats()
        self.storage = LocalStorage()
        self.job_manager = JobManager(self.crawler, self.lister)
        self.setup_completed = False
        self.app = gui("JobFinder", "700x300")
        self.configure_app(self.app)
        self.app.go()

    def train_btn(self, value):
        app = self.app
        num_jobs = self.spider.num_jobs
        if value == 'dummy':
            progress = 1
            job_ratio = '1/' + str(num_jobs)
            job = self.spider.jobs[0]

        else:
            self.job_num = self.job_num + 1
            current_job = self.spider.jobs[self.job_num - 1]

            if value == 'Accept':
                current_job.passed = True
            else:
                current_job.passed = False

            if self.job_num >= self.spider.num_jobs:
                app.hideSubWindow("Train")
                self.storage.store_jobs(self.job_manager.jobs)
                return

            else:
                job_ratio = str(self.job_num + 1) + '/' + str(num_jobs)
                progress = (self.job_num / num_jobs) * 100
                job = self.spider.jobs[self.job_num]

        title_text = job.title + '\n' + job.company + '\n' + job.location
        app.openSubWindow("Train")
        app.updateListBox('Keywords', job.keywords)
        app.setLabel('job_label', title_text)
        app.setLabelAnchor('job_label', 'center')
        app.setMeter("progress_bar", progress, text=job_ratio + " Jobs")
        app.stopSubWindow()

    def load(self):
        listings_percent = 0
        postings_percent = 0
        descriptions_percent = 0

        while descriptions_percent < 100:
            pages_percent = self.spider.percents[0]
            jobs_percent = self.spider.percents[1]
            extract_percent = self.spider.percents[2]
            jobs_percent_text = str(round(jobs_percent,
                                          1)) + '% ' + 'jobs loaded.'
            pages_percent_text = str(round(pages_percent,
                                           1)) + '% ' + 'pages loaded.'
            extract_percent_text = str(round(extract_percent,
                                             1)) + '% ' + 'data extracted.'
            self.app.queueFunction(self.app.setMeter,
                                   "pages_bar",
                                   pages_percent,
                                   text=pages_percent_text)
            self.app.queueFunction(self.app.setMeter,
                                   "jobs_bar",
                                   jobs_percent,
                                   text=jobs_percent_text)
            self.app.queueFunction(self.app.setMeter,
                                   "extract_bar",
                                   extract_percent,
                                   text=extract_percent_text)
            sleep(1)
        self.app.hideSubWindow("LoadScreen")

    def press(self, win):
        app = self.app
        if win == 'Setup':
            app.showSubWindow("LoadScreen")
            self.crawler = EngineerJobsCrawler()
            self.lister = EngineerJobsLister()
            self.job_manager = JobManager(self.crawler, self.lister)
            app.thread(self.load)
            app.thread(self.job_manager.get_jobs())
            self.setup_completed = True

        elif win == 'Train':
            if self.setup_completed:
                if self.job_num >= self.num_jobs:
                    self.num_jobs = self.spider.num_jobs
                    self.storage.clear_directory(self.train_pass)
                    self.storage.clear_directory(self.train_fail)
                    self.train_btn('dummy')
                    self.job_num = 0
                app.showSubWindow("Train")
            else:
                app.errorBox("Error.", "Setup not completed.")

        else:
            if self.setup_completed:
                training_data = os.listdir(self.train_pass) or os.listdir(
                    self.train_fail)
            else:
                app.errorBox('Error.', 'Setup not completed.')
                return

            if training_data:
                self.storage.clear_directory(self.classify_pass)
                self.storage.clear_directory(self.classify_fail)
                self.jobs = self.storage.get_jobs_from_cache()
                self.stats.clear_training_data()
                self.stats.train(self.jobs)
                for job in self.spider.jobs:
                    job.passed = self.stats.classify(job)
                self.storage.store_jobs(self.job_manager.jobs)
            else:
                app.errorBox("Error.", "No training data found.")

    def about(self):
        message = 'This program finds job postings of interest based on training using keywords.'
        self.app.infoBox("About Job Finder", message, parent=None)

    def settings(self):
        self.app.showSubWindow('Settings')

    def configure_app(self, app):
        app.setFont(size=12, family="Arial")
        app.setBg("grey")
        app.setSticky("nsew")
        app.setPadding([20, 60])
        tools = ["ABOUT", "SETTINGS"]
        app.addToolbar(tools, [self.about, self.settings], findIcon=True)
        app.addButton("Setup", self.press, 1, 0)
        app.addButton("Train", self.press, 1, 1)
        app.addButton("Classify", self.press, 1, 2)
        app.startSubWindow("Train", modal=True)
        app.setSize("800x400")
        app.setSticky('ew')
        app.addLabel("job_label", "Job Title", 0, 0, 3)
        app.setSticky('')
        app.addListBox('Keywords', [], 1, 1, 1, 1)
        app.setListBoxWidth('Keywords', 40)
        app.setSticky('e')
        app.addButton('Accept', self.train_btn, 1, 0)
        app.setSticky('w')
        app.addButton('Reject', self.train_btn, 1, 2)
        app.setSticky('ew')
        app.addMeter('progress_bar', 2, 0, 3)
        app.setMeterFill("progress_bar", "grey")
        app.setLabelWidth('job_label', 20)
        app.stopSubWindow()
        app.startSubWindow("Settings", modal=True)
        app.setSize("400x300")
        app.addLabel('time_label', 'Select how many days to use for training.')
        app.addRadioButton("frequency", "1")
        app.addRadioButton("frequency", "3")
        app.addRadioButton("frequency", "7")
        app.addRadioButton("frequency", "14")
        app.addRadioButton("frequency", "30")
        app.stopSubWindow()
        app.startSubWindow("LoadScreen", modal=True)
        app.addLabel('load_label', 'Loading data, please wait...')
        app.setSize("300x200")
        app.setSticky('ew')
        app.addMeter('pages_bar')
        app.setMeterFill("pages_bar", "grey")
        app.addMeter('jobs_bar')
        app.setMeterFill("jobs_bar", "grey")
        app.addMeter('extract_bar')
        app.setMeterFill("extract_bar", "grey")
        app.stopSubWindow()
示例#13
0
 def __init__(self):
     self.keyword_names = LocalStorage.get_keyword_names()
示例#14
0
 def read_resume_config():
     resume_config = LocalStorage.read_json_config(
         'resume/resume_config.json')
     return resume_config
示例#15
0
 def save_data(self):
     keyword_dict = self.encode_keywords_to_dict()
     LocalStorage.store_json_data(self.data_path, keyword_dict)
     LocalStorage.store_json_data(self.totals_path, self.totals)
示例#16
0
 def load_totals(self):
     totals = LocalStorage.get_json_data(self.totals_path)
     return totals
示例#17
0
 def load_keywords(self):
     keyword_dict = LocalStorage.get_json_data(self.data_path)
     keywords = self.encode_dict_to_keywords(keyword_dict)
     return keywords
示例#18
0
 def load_resume_config():
     return LocalStorage.read_json_config('resume/resume_config.json')
示例#19
0
class JobManager:
    def __init__(self, crawler='', lister=''):
        self.jobs = []
        self.failed_jobs = []
        self.storage = LocalStorage()
        self.last_id = self.get_last_job_id()
        self.crawler = Loader.load('Crawlers', crawler) if crawler else None
        self.lister = Loader.load('Listers', lister,
                                  self.last_id) if lister else None
        self.parse_utility = ParseUtility.ParseUtility()

    def get_jobs(self):
        listings = self.get_job_listings()
        postings = self.get_job_postings(listings)
        logging.debug("Job postings with descriptions: " + str(len(postings)))
        jobs = self.get_jobs_from_postings(postings)
        jobs_with_keywords = self.update_jobs_with_keywords(jobs)
        self.jobs = jobs_with_keywords
        return self.jobs

    def get_job_listings(self):
        page_listings_queue = self.crawler.crawl_job_listings()
        page_listings = QueueUnpacker.unpack(page_listings_queue)
        self.lister.add_pages(page_listings)
        listings = self.lister.get_listings()
        return listings

    def update_jobs_with_keywords(self, jobs):
        self.failed_jobs = self.parse_utility.pop_empty(jobs)
        logging.debug('Failed jobs: ' + str(self.failed_jobs))
        jobs_with_keywords = self.parse_utility.update_keywords(jobs)
        return jobs_with_keywords

    def get_job_postings(self, listings):
        posting_queue = self.crawler.crawl_job_postings(listings)
        postings = QueueUnpacker.unpack(posting_queue)
        return postings

    def get_jobs_from_postings(self, postings):
        jobs = []
        for posting in postings:
            job = Job(posting)
            jobs.append(job)
        logging.debug('Created ' + str(len(jobs)) + ' jobs from postings.')
        return self.update_job_descriptions(jobs)

    def update_job_descriptions(self, jobs):
        jobs_queue = self.crawler.crawl_job_descriptions(jobs)
        jobs = QueueUnpacker.unpack(jobs_queue)
        return jobs

    def set_jobs(self, jobs):
        self.jobs = jobs

    def get_num_jobs(self):
        return len(self.jobs)

    def clear_jobs(self):
        self.jobs = []

    def store(self):
        self.storage.store_jobs(self.jobs)

    def get_last_job_id(self):
        last_id = self.storage.get_free_job_id() - 1
        return last_id