def scrape_single_position_job(self, job: Job, job_element: WebElement): summary_element = self.find_summary_element(job_element, 'pv-entity__summary-info') job.company.name = self.canonize_company_name( summary_element.find_element_by_class_name( 'pv-entity__secondary-title').text) position = Position() position.title = summary_element.find_element_by_class_name( 't-16').text.strip() position.date_range = self.scrape_date_range(job_element) position.location = self.scrape_location(job_element) job.add_position(position)
def scrape_multi_position_job(self, job: Job, job_element: WebElement): summary_element = self.find_summary_element( job_element, 'pv-entity__company-summary-info') # <h3 class="t-16 t-black t-bold"> # <span class="visually-hidden">Company Name</span> # <span>University of Colorado Boulder</span> # </h3> # company_element = summary_element.find_element_by_class_name('t-16') company_element = summary_element.find_element_by_tag_name('h3') company_spans = company_element.find_elements_by_tag_name('span') job.company.name = self.canonize_company_name(company_spans[1].text) duration_element = summary_element.find_element_by_tag_name('h4') duration_spans = duration_element.find_elements_by_tag_name('span') job.total_duration = duration_spans[1].text.strip() # <ul class="pv-entity__position-group mt2"> positions_element = job_element.find_element_by_class_name( 'pv-entity__position-group') # <li class="pv-entity__position-group-role-item"> positions_items = positions_element.find_elements_by_class_name( 'pv-entity__position-group-role-item') for position_item in positions_items: position = Position() # <div class="pv-entity__summary-info-v2 pv-entity__summary-info--background-section pv-entity__summary-info-margin-top mb2"> position_element = position_item.find_element_by_class_name( 'pv-entity__summary-info--background-section') # <h3 class="t-14"> <span>Title</span> <span>Web Designer</span> title_element = position_element.find_element_by_tag_name('h3') title_spans = title_element.find_elements_by_tag_name('span') position.title = title_spans[1].text.strip() position.date_range = self.scrape_date_range(position_element) position.location = self.scrape_location(position_element) position.duration = self.scrape_duration(position_element) job.add_position(position)