EC.presence_of_all_elements_located((By.XPATH, f'{THE_XPATH}'))) value = browser.find_element_by_xpath(f'{THE_XPATH}').text course_data['Career_Outcomes'] = value except (AttributeError, TimeoutException, NoSuchElementException, ElementNotInteractableException): print('cant extract outcomes') # DURATION try: THE_XPATH = "(//*[text()='FULL TIME'][1]/following::*[1])[1]" WebDriverWait(browser, delay).until( EC.presence_of_all_elements_located((By.XPATH, f'{THE_XPATH}'))) value = browser.find_element_by_xpath(f'{THE_XPATH}').text course_data['Full_Time'] = 'Yes' duration = convert_duration( value.replace('trimester', 'semester').replace('yrs', 'years')) course_data['Duration'] = duration[0] course_data['Duration_Time'] = duration[1] if duration[0] < 2 and 'month' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Month' if duration[0] < 2 and 'year' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Year' if 'week' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Weeks' except (AttributeError, TypeError, TimeoutException, NoSuchElementException, ElementNotInteractableException) as e: print('cant extract full time duration')
except NoSuchElementException: html_ = browser.page_source print('got duration source') soup_ = bs4.BeautifulSoup(html_, 'lxml') duration_tag = soup_.find('li', {'id': 'course-overview-duration'}) if duration_tag: duration = tag_text(duration_tag) print(f'duration so far: {duration}') if 'part time' in duration.lower( ) or 'part-time' in duration.lower(): course_data['Part_Time'] = 'Yes' if 'full time' in duration.lower( ) or 'full-time' in duration.lower(): course_data['Full_Time'] = 'Yes' duration = convert_duration( duration.replace('trimester', 'semester')) course_data['Duration'] = duration[0] course_data['Duration_Time'] = duration[1] if duration[0] < 2 and 'month' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Month' if duration[0] < 2 and 'year' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Year' if 'week' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Weeks' else: html_ = browser.page_source print('got duration source') soup_ = bs4.BeautifulSoup(html_, 'lxml')