def __init__(self, crawler='', lister=''): self.jobs = [] self.failed_jobs = [] self.storage = LocalStorage() self.last_id = self.get_last_job_id() self.crawler = Loader.load('Crawlers', crawler) if crawler else None self.lister = Loader.load('Listers', lister, self.last_id) if lister else None self.parse_utility = ParseUtility.ParseUtility()
def __init__(self): self.crawler = EngineerJobsCrawler() self.lister = EngineerJobsLister() self.stats = Stats() self.storage = LocalStorage() self.job_manager = JobManager(self.crawler, self.lister) self.setup_completed = False self.app = gui("JobFinder", "700x300") self.configure_app(self.app) self.app.go()
def _loadToken(cls): local_storage = LocalStorage(cls._TOKEN_PATH) local_storage.setFileName(cls._TOKEN_FILENAME) if not local_storage.existFile(): return False token_file=cls._TOKEN_PATH+"/"+cls._TOKEN_FILENAME with open(token_file, 'rb') as token: cls._TOKEN =pickle.load(token) return cls
def __init__(self): self.keyword_folder = os.path.join('Storage', 'config', 'keywords') self.data_path = os.path.join(self.keyword_folder, 'keyword_data.json') self.totals_path = os.path.join(self.keyword_folder, 'totals.json') self.keyword_names = LocalStorage.get_keyword_names() self.totals = self.load_totals() self.keywords = self.load_keywords()
def run(): LocalStorage().clear_database() manager = EngineerJobsManager() storage = manager.storage jobs = manager.get_jobs() storage.store_jobs(jobs) retrieved_jobs = storage.get_jobs_from_database() for job in retrieved_jobs: print(job.get_title()) print('Total number of jobs: ' + str(len(jobs)))
def __init__(self): self.header = '' self.resume_path = LocalStorage.get_config_file_path( 'resume/Resume.docx') self.text = self.extract_text(self.resume_path) self.section_whitelist = self.load_section_whitelist() self.section_names = self.get_section_names(self.section_whitelist) self.sections = self.get_sections() self.resume_config = self.get_resume_config() self.store_resume_config()
class Loader: _CONFIG_PATH = "config" _CONFIG_FILENAME = "config.json" _LOCAL_STORAGE = LocalStorage(_CONFIG_PATH) _JSON_CONFIG = None def __init__(self): self.loadConfiguration() @classmethod def setFileName(cls, fileName): if len(fileName) <= 0: return ValueError cls._CONFIG_FILENAME = fileName return cls @classmethod def loadConfiguration(cls): cls._LOCAL_STORAGE.setFileName(cls._CONFIG_FILENAME) if not cls._LOCAL_STORAGE.existFile(): raise NameError("The configuration file was not found") json_content = cls._LOCAL_STORAGE.getFileContent() cls._JSON_CONFIG = json.loads(json_content) return cls @classmethod def getConfigurations(cls): return cls._JSON_CONFIG @classmethod def getConfig(cls, configName): if len(configName) <= 0: return None try: config_found = cls._JSON_CONFIG[configName] return config_found except TypeError: return None except ValueError: return None @staticmethod def get_config(configName): if len(configName) <= 0: return None config_loader = Loader() return config_loader.getConfig(configName)
def _loadCredentials(cls): local_storage = LocalStorage(cls._CREDENTIALS_PATH) local_storage.setFileName(cls._CREDENTIALS_FILENAME) if not local_storage.existFile(): raise NameError("missing file for credentials") file_content = local_storage.getFileContent() cls._CREDENTIALS = loads(file_content) return cls
def _savePhisicalFiles(cls): if len(cls._FILES_FOUND) <= 0: return None index = 0 for item in cls._FILES_FOUND: try: file = LocalStorage(cls._FOLDER_FORFILES) file.setFileName(item['fileName']) content = base64.urlsafe_b64decode(item['data'].encode('utf-8')) file.saveFileContent(content=content, mode="wb") print(f"File Saved {item['fileName']}") except: print(f"Fail to save the file {item['fileName']}") return cls
def store_resume_config(self): LocalStorage.store_json_config('resume/resume_config.json', self.resume_config)
def load_section_whitelist(): section_names = LocalStorage.get_config_file_text( 'resume/resume_sections.txt') return section_names.split('\n')
class GUI: def __init__(self): self.crawler = EngineerJobsCrawler() self.lister = EngineerJobsLister() self.stats = Stats() self.storage = LocalStorage() self.job_manager = JobManager(self.crawler, self.lister) self.setup_completed = False self.app = gui("JobFinder", "700x300") self.configure_app(self.app) self.app.go() def train_btn(self, value): app = self.app num_jobs = self.spider.num_jobs if value == 'dummy': progress = 1 job_ratio = '1/' + str(num_jobs) job = self.spider.jobs[0] else: self.job_num = self.job_num + 1 current_job = self.spider.jobs[self.job_num - 1] if value == 'Accept': current_job.passed = True else: current_job.passed = False if self.job_num >= self.spider.num_jobs: app.hideSubWindow("Train") self.storage.store_jobs(self.job_manager.jobs) return else: job_ratio = str(self.job_num + 1) + '/' + str(num_jobs) progress = (self.job_num / num_jobs) * 100 job = self.spider.jobs[self.job_num] title_text = job.title + '\n' + job.company + '\n' + job.location app.openSubWindow("Train") app.updateListBox('Keywords', job.keywords) app.setLabel('job_label', title_text) app.setLabelAnchor('job_label', 'center') app.setMeter("progress_bar", progress, text=job_ratio + " Jobs") app.stopSubWindow() def load(self): listings_percent = 0 postings_percent = 0 descriptions_percent = 0 while descriptions_percent < 100: pages_percent = self.spider.percents[0] jobs_percent = self.spider.percents[1] extract_percent = self.spider.percents[2] jobs_percent_text = str(round(jobs_percent, 1)) + '% ' + 'jobs loaded.' pages_percent_text = str(round(pages_percent, 1)) + '% ' + 'pages loaded.' extract_percent_text = str(round(extract_percent, 1)) + '% ' + 'data extracted.' self.app.queueFunction(self.app.setMeter, "pages_bar", pages_percent, text=pages_percent_text) self.app.queueFunction(self.app.setMeter, "jobs_bar", jobs_percent, text=jobs_percent_text) self.app.queueFunction(self.app.setMeter, "extract_bar", extract_percent, text=extract_percent_text) sleep(1) self.app.hideSubWindow("LoadScreen") def press(self, win): app = self.app if win == 'Setup': app.showSubWindow("LoadScreen") self.crawler = EngineerJobsCrawler() self.lister = EngineerJobsLister() self.job_manager = JobManager(self.crawler, self.lister) app.thread(self.load) app.thread(self.job_manager.get_jobs()) self.setup_completed = True elif win == 'Train': if self.setup_completed: if self.job_num >= self.num_jobs: self.num_jobs = self.spider.num_jobs self.storage.clear_directory(self.train_pass) self.storage.clear_directory(self.train_fail) self.train_btn('dummy') self.job_num = 0 app.showSubWindow("Train") else: app.errorBox("Error.", "Setup not completed.") else: if self.setup_completed: training_data = os.listdir(self.train_pass) or os.listdir( self.train_fail) else: app.errorBox('Error.', 'Setup not completed.') return if training_data: self.storage.clear_directory(self.classify_pass) self.storage.clear_directory(self.classify_fail) self.jobs = self.storage.get_jobs_from_cache() self.stats.clear_training_data() self.stats.train(self.jobs) for job in self.spider.jobs: job.passed = self.stats.classify(job) self.storage.store_jobs(self.job_manager.jobs) else: app.errorBox("Error.", "No training data found.") def about(self): message = 'This program finds job postings of interest based on training using keywords.' self.app.infoBox("About Job Finder", message, parent=None) def settings(self): self.app.showSubWindow('Settings') def configure_app(self, app): app.setFont(size=12, family="Arial") app.setBg("grey") app.setSticky("nsew") app.setPadding([20, 60]) tools = ["ABOUT", "SETTINGS"] app.addToolbar(tools, [self.about, self.settings], findIcon=True) app.addButton("Setup", self.press, 1, 0) app.addButton("Train", self.press, 1, 1) app.addButton("Classify", self.press, 1, 2) app.startSubWindow("Train", modal=True) app.setSize("800x400") app.setSticky('ew') app.addLabel("job_label", "Job Title", 0, 0, 3) app.setSticky('') app.addListBox('Keywords', [], 1, 1, 1, 1) app.setListBoxWidth('Keywords', 40) app.setSticky('e') app.addButton('Accept', self.train_btn, 1, 0) app.setSticky('w') app.addButton('Reject', self.train_btn, 1, 2) app.setSticky('ew') app.addMeter('progress_bar', 2, 0, 3) app.setMeterFill("progress_bar", "grey") app.setLabelWidth('job_label', 20) app.stopSubWindow() app.startSubWindow("Settings", modal=True) app.setSize("400x300") app.addLabel('time_label', 'Select how many days to use for training.') app.addRadioButton("frequency", "1") app.addRadioButton("frequency", "3") app.addRadioButton("frequency", "7") app.addRadioButton("frequency", "14") app.addRadioButton("frequency", "30") app.stopSubWindow() app.startSubWindow("LoadScreen", modal=True) app.addLabel('load_label', 'Loading data, please wait...') app.setSize("300x200") app.setSticky('ew') app.addMeter('pages_bar') app.setMeterFill("pages_bar", "grey") app.addMeter('jobs_bar') app.setMeterFill("jobs_bar", "grey") app.addMeter('extract_bar') app.setMeterFill("extract_bar", "grey") app.stopSubWindow()
def __init__(self): self.keyword_names = LocalStorage.get_keyword_names()
def read_resume_config(): resume_config = LocalStorage.read_json_config( 'resume/resume_config.json') return resume_config
def save_data(self): keyword_dict = self.encode_keywords_to_dict() LocalStorage.store_json_data(self.data_path, keyword_dict) LocalStorage.store_json_data(self.totals_path, self.totals)
def load_totals(self): totals = LocalStorage.get_json_data(self.totals_path) return totals
def load_keywords(self): keyword_dict = LocalStorage.get_json_data(self.data_path) keywords = self.encode_dict_to_keywords(keyword_dict) return keywords
def load_resume_config(): return LocalStorage.read_json_config('resume/resume_config.json')
class JobManager: def __init__(self, crawler='', lister=''): self.jobs = [] self.failed_jobs = [] self.storage = LocalStorage() self.last_id = self.get_last_job_id() self.crawler = Loader.load('Crawlers', crawler) if crawler else None self.lister = Loader.load('Listers', lister, self.last_id) if lister else None self.parse_utility = ParseUtility.ParseUtility() def get_jobs(self): listings = self.get_job_listings() postings = self.get_job_postings(listings) logging.debug("Job postings with descriptions: " + str(len(postings))) jobs = self.get_jobs_from_postings(postings) jobs_with_keywords = self.update_jobs_with_keywords(jobs) self.jobs = jobs_with_keywords return self.jobs def get_job_listings(self): page_listings_queue = self.crawler.crawl_job_listings() page_listings = QueueUnpacker.unpack(page_listings_queue) self.lister.add_pages(page_listings) listings = self.lister.get_listings() return listings def update_jobs_with_keywords(self, jobs): self.failed_jobs = self.parse_utility.pop_empty(jobs) logging.debug('Failed jobs: ' + str(self.failed_jobs)) jobs_with_keywords = self.parse_utility.update_keywords(jobs) return jobs_with_keywords def get_job_postings(self, listings): posting_queue = self.crawler.crawl_job_postings(listings) postings = QueueUnpacker.unpack(posting_queue) return postings def get_jobs_from_postings(self, postings): jobs = [] for posting in postings: job = Job(posting) jobs.append(job) logging.debug('Created ' + str(len(jobs)) + ' jobs from postings.') return self.update_job_descriptions(jobs) def update_job_descriptions(self, jobs): jobs_queue = self.crawler.crawl_job_descriptions(jobs) jobs = QueueUnpacker.unpack(jobs_queue) return jobs def set_jobs(self, jobs): self.jobs = jobs def get_num_jobs(self): return len(self.jobs) def clear_jobs(self): self.jobs = [] def store(self): self.storage.store_jobs(self.jobs) def get_last_job_id(self): last_id = self.storage.get_free_job_id() - 1 return last_id