def set(self, parameter: JobField, job: Job, soup: BeautifulSoup) -> None: """Set a single job attribute from a soup object by JobField NOTE: priority is: HIGH: RAW, LOW: DESCRIPTION / TAGS """ if parameter == JobField.RAW: job._raw_scrape_data = BeautifulSoup( self.session.get(job.url).text, self.config.bs4_parser) elif parameter == JobField.WAGE: pot_wage_cell = job._raw_scrape_data.find( 'div', attrs={'class': 'col-xs-12 cell'}) if pot_wage_cell: pot_wage_value = pot_wage_cell.find('div') if pot_wage_value: job.wage = pot_wage_value.text.strip() elif parameter == JobField.DESCRIPTION: assert job._raw_scrape_data job.description = job._raw_scrape_data.find( id='JobDescription').text.strip() elif parameter == JobField.TAGS: # NOTE: this seems a bit flimsy, monster allows a lot of flex. here assert job._raw_scrape_data tags = [] # type: List[str] for li in job._raw_scrape_data.find_all( 'section', attrs={'class': 'summary-section'}): table_key = li.find('dt') if (table_key and table_key.text.strip().lower() in MONSTER_SIDEPANEL_TAG_ENTRIES): table_value = li.find('dd') if table_value: tags.append(table_value.text.strip()) else: raise NotImplementedError(f"Cannot set {parameter.name}")
def set(self, parameter: JobField, job: Job, soup: BeautifulSoup) -> None: """Set a single job attribute from a soup object by JobField NOTE: Description has to get and should be respectfully delayed """ if parameter == JobField.RAW: job._raw_scrape_data = BeautifulSoup( self.session.get(job.url).text, self.config.bs4_parser) elif parameter == JobField.DESCRIPTION: assert job._raw_scrape_data job.description = job._raw_scrape_data.find( id='JobDescriptionContainer').text.strip() else: raise NotImplementedError(f"Cannot set {parameter.name}")
def set(self, parameter: JobField, job: Job, soup: BeautifulSoup) -> None: """Set a single job attribute from a soup object by JobField NOTE: URL is high-priority, since we need it to get RAW. """ if parameter == JobField.RAW: job._raw_scrape_data = BeautifulSoup( self.session.get(job.url).text, self.config.bs4_parser) elif parameter == JobField.DESCRIPTION: assert job._raw_scrape_data job.description = job._raw_scrape_data.find( id='jobDescriptionText').text.strip() elif parameter == JobField.URL: assert job.key_id job.url = (f"http://www.indeed.{self.config.search_config.domain}/" f"viewjob?jk={job.key_id}") else: raise NotImplementedError(f"Cannot set {parameter.name}")