示例#1
0
 def set(self, parameter: JobField, job: Job, soup: BeautifulSoup) -> None:
     """Set a single job attribute from a soup object by JobField
     NOTE: priority is: HIGH: RAW, LOW: DESCRIPTION / TAGS
     """
     if parameter == JobField.RAW:
         job._raw_scrape_data = BeautifulSoup(
             self.session.get(job.url).text, self.config.bs4_parser)
     elif parameter == JobField.WAGE:
         pot_wage_cell = job._raw_scrape_data.find(
             'div', attrs={'class': 'col-xs-12 cell'})
         if pot_wage_cell:
             pot_wage_value = pot_wage_cell.find('div')
             if pot_wage_value:
                 job.wage = pot_wage_value.text.strip()
     elif parameter == JobField.DESCRIPTION:
         assert job._raw_scrape_data
         job.description = job._raw_scrape_data.find(
             id='JobDescription').text.strip()
     elif parameter == JobField.TAGS:
         # NOTE: this seems a bit flimsy, monster allows a lot of flex. here
         assert job._raw_scrape_data
         tags = []  # type: List[str]
         for li in job._raw_scrape_data.find_all(
                 'section', attrs={'class': 'summary-section'}):
             table_key = li.find('dt')
             if (table_key and table_key.text.strip().lower()
                     in MONSTER_SIDEPANEL_TAG_ENTRIES):
                 table_value = li.find('dd')
                 if table_value:
                     tags.append(table_value.text.strip())
     else:
         raise NotImplementedError(f"Cannot set {parameter.name}")
示例#2
0
 def set(self, parameter: JobField, job: Job, soup: BeautifulSoup) -> None:
     """Set a single job attribute from a soup object by JobField
     NOTE: Description has to get and should be respectfully delayed
     """
     if parameter == JobField.RAW:
         job._raw_scrape_data = BeautifulSoup(
             self.session.get(job.url).text, self.config.bs4_parser)
     elif parameter == JobField.DESCRIPTION:
         assert job._raw_scrape_data
         job.description = job._raw_scrape_data.find(
             id='JobDescriptionContainer').text.strip()
     else:
         raise NotImplementedError(f"Cannot set {parameter.name}")
示例#3
0
 def set(self, parameter: JobField, job: Job, soup: BeautifulSoup) -> None:
     """Set a single job attribute from a soup object by JobField
     NOTE: URL is high-priority, since we need it to get RAW.
     """
     if parameter == JobField.RAW:
         job._raw_scrape_data = BeautifulSoup(
             self.session.get(job.url).text, self.config.bs4_parser)
     elif parameter == JobField.DESCRIPTION:
         assert job._raw_scrape_data
         job.description = job._raw_scrape_data.find(
             id='jobDescriptionText').text.strip()
     elif parameter == JobField.URL:
         assert job.key_id
         job.url = (f"http://www.indeed.{self.config.search_config.domain}/"
                    f"viewjob?jk={job.key_id}")
     else:
         raise NotImplementedError(f"Cannot set {parameter.name}")