def durationo(p_word): if p_word: if 'year' in p_word.__str__().lower(): value_conv = DurationConverter.convert_duration(p_word) duration = float(''.join(filter(str.isdigit, str(value_conv[0])))) duration_time = value_conv[1] if str(duration) == '1' or str(duration) == '1.0': course_data['Duration'] = duration course_data['Duration_Time'] = 'Year' elif 'month' in duration_time.__str__().lower(): course_data['Duration'] = duration course_data['Duration_Time'] = 'Months' else: course_data['Duration'] = duration course_data['Duration_Time'] = duration_time elif 'months' in p_word.__str__().lower(): value_conv = DurationConverter.convert_duration(p_word) duration = float(''.join(filter(str.isdigit, str(value_conv[0])))) duration_time = value_conv[1] course_data['Duration'] = duration course_data['Duration_Time'] = duration_time elif 'week' in p_word.__str__().lower(): value_conv = DurationConverter.convert_duration(p_word) duration = float(''.join(filter(str.isdigit, str(value_conv[0])))) duration_time = value_conv[1] course_data['Duration'] = duration course_data['Duration_Time'] = duration_time elif 'day' in p_word.__str__().lower(): value_conv = DurationConverter.convert_duration(p_word) duration = float(''.join(filter(str.isdigit, str(value_conv[0])))) course_data['Duration'] = duration course_data['Duration_Time'] = 'Days' else: course_data['Duration'] = "" course_data['Duration_Time'] = '' else: course_data['Duration'] = '' course_data['Duration_Time'] = '' course_data['Full_Time'] = "" course_data['Part_Time'] = ""
print('no career outcomes listed') print('CITY: ', course_data['City']) print('OFFLINE: ', course_data['Offline']) # DURATION, PART-TIME, FULL TIME dt1 = soup.find('dt', class_='info-group-title__ duration', text=re.compile('Duration', re.IGNORECASE)) if dt1: dd1 = dt1.find_next('dd') if dd1: duration_raw = tag_text(dd1) p_word = duration_raw if 'year' in p_word.__str__().lower(): value_conv = DurationConverter.convert_duration(p_word) duration = float(''.join(filter(str.isdigit, str(value_conv)))[0]) duration_time = 'Years' if str(duration) == '1' or str(duration) == '1.00' or str( duration) == '1.0': duration_time = 'Year' course_data['Duration'] = duration course_data['Duration_Time'] = duration_time print('DURATION + DURATION TIME: ', duration, duration_time) # print(tag_text(div_span)) elif 'month' in p_word.__str__().lower(): value_conv = DurationConverter.convert_duration(p_word) duration = float(''.join(filter(str.isdigit, str(value_conv)))[0]) duration_time = 'Months'
except AttributeError: course_data['Career_Outcomes/path'] = "" #print("No career Outcomes for", course_data['Course']) #DURATION/Duration Time/FullTime/Parttime/ avaialbility_list = [] try: duration = soup.select( "div.course-overview__info:nth-child(3) > div:nth-child(2) > p:nth-child(1)" ) for t in duration: dur = tag_text(t) # print(dur) course_duration = DurationConverter.convert_duration((dur)) # print(course_duration) course_data['Duration'] = course_duration[0] course_data['Duration_Time'] = course_duration[1] except TypeError: course_data['Duration'] = 'N/A' course_data['Duration_Time'] = 'N/A' # print(course_data['Duration']) # print(course_data['Duration_Time']) avaialbility_list.append(dur) for data in avaialbility_list: if avaialbility_list: if data.lower().find('full-time') != -1 and data.lower().find(
# DURATION try: duration_tag = soup.find('h4', text=re.compile('Duration', re.IGNORECASE))\ .find_parent('div', {'class': 'course-fast-facts__tile__header'})\ .find_next_sibling('div', {'class': 'course-fast-facts__tile__body'})\ .find('div', {'course-fast-facts__tile__body-top'}) if duration_tag: duration_raw = tag_text(duration_tag) if 'part-time' in duration_raw.lower( ) or 'part time' in duration_raw.lower(): course_data['Part_Time'] = 'Yes' if 'full time' in duration_raw.lower( ) or 'full-time' in duration_raw.lower(): course_data['Full_Time'] = 'Yes' duration = DurationConverter.convert_duration(duration_raw) course_data['Duration'] = duration[0] course_data['Duration_Time'] = duration[1] if duration[0] < 2 and 'month' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Month' if duration[0] < 2 and 'year' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Year' if 'week' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Weeks' except (AttributeError, TypeError): print('some error occurred while trying to decode duration.') print('DURATION + DURATION TIME: ', course_data['Duration'], course_data['Duration_Time'])
course_data['Part_Time'] = 'Yes' part_time = tag_text(part_time_tag4).lower().replace( 'part-time:', '') print('part-time duration: ', part_time) elif not part_time_tag and not part_time_tag2 and not part_time_tag3 and not part_time_tag4 and part_time_tag5: course_data['Part_Time'] = 'Yes' part_time = tag_text(part_time_tag5).lower().replace( 'part-time:', '') print('part-time duration: ', part_time) except (AttributeError, IndexError) as err: print(err.__traceback__) try: if full_time is not None and 'hours' not in full_time: duration = DurationConverter.convert_duration( full_time.replace('trimester', 'semester')) course_data['Duration'] = duration[0] course_data['Duration_Time'] = duration[1] if duration[0] < 2 and 'month' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Month' if duration[0] < 2 and 'year' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Year' if 'week' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Weeks' except TypeError: print('type error with trimester again') try:
except AttributeError: course_data['Career_Outcomes/path'] = "" #print("No career Outcomes for", course_data['Course']) # DURATION/Duration Time/FullTime/Parttime/ avaialbility_list = [] try: table = soup.find('table', attrs={'class': 'course-info'}) course_detail = table.find( 'p', class_='hidden-print', text=re.compile('Duration:')).find_next('td') course_detail2 = tag_text(course_detail) conv = DurationConverter.convert_duration((course_detail2)) course_data['Duration'] = conv[0] course_data['Duration_Time'] = conv[1] except TypeError: course_data['Duration'] = 'N/A' course_data['Duration_Time'] = 'N/A' except Exception: course_data['Duration'] = 'N/A' course_data['Duration_Time'] = 'N/A' #print(course_data['Duration']) avaialbility_list.append(course_detail2) for data in avaialbility_list: if avaialbility_list:
career_outcome_div = a_tag.find_next_sibling('div', class_='slide js-acc-hidden') p_list = career_outcome_div.find_all('p') for c in p_list: c_outcome = tag_text(c).replace('\n', '') course_data['Career_Outcomes/path'] = c_outcome #print(c_outcome) # DURATION/Duration Time/FullTime/Parttime/ avaialbility_list = [] try: duration = soup.select('.fz-16 > li:nth-child(2) > div:nth-child(2) > div:nth-child(1) > p:nth-child(1) > strong:nth-child(1)') for dur in duration: course_detail = dur.text.rstrip() course_detail2 = DurationConverter.convert_duration(course_detail) # print(course_detail2) course_data['Duration'] = course_detail2[0] course_data['Duration_Time'] = course_detail2[1] except TypeError: course_data['Duration'] = 'N/A' course_data['Duration_Time'] = 'N/A' except Exception: course_data['Duration'] = 'N/A' course_data['Duration_Time'] = 'N/A' # print(course_data['Duration']) # print(course_data['Duration_Time'])
course_data['Course_Lang'] = 'English' print('COURSE LANGUAGE: ', course_data['Course_Lang']) # CITY actual_cities.append('carlton') # DURATION dura_tag = soup.find('div', class_='page-details') if dura_tag: details_list = dura_tag.find_all('div', class_='course-detail') if details_list: for i, duration in enumerate(details_list): if i == 2: dura_ = duration.find('a') dura_text = dura_.get_text().strip() converted_dura = dura.convert_duration(dura_text) if converted_dura is not None: duration_l = list(converted_dura) if duration_l[0] == 1 and 'Years' in duration_l[1]: duration_l[1] = 'Year' if duration_l[0] == 1 and 'Months' in duration_l[1]: duration_l[1] = 'Month' course_data['Duration'] = duration_l[0] course_data['Duration_Time'] = duration_l[1] print('COURSE DURATION: ', str(duration_l[0]) + ' / ' + duration_l[1]) # UNITS subject_title = soup.find('h4', class_='title', text=re.compile('Core Subjects', re.IGNORECASE)) if subject_title: title_parent = subject_title.find_parent('div', class_='c-accordion') if title_parent:
print('DESCRIPTION: ', desc_list) # DURATION duration_title = soup.find('span', text=re.compile('Duration:', re.IGNORECASE)) if duration_title: duration = duration_title.find_next_sibling('span') if duration: duration_text = duration.get_text().lower() if 'part-time' in duration_text: course_data['Part_Time'] = 'yes' course_data['Full_Time'] = 'yes' else: course_data['Part_Time'] = 'no' course_data['Full_Time'] = 'yes' converted_duration = dura.convert_duration(duration_text) if converted_duration is not None: duration_l = list(converted_duration) if duration_l[0] == 1 and 'Years' in duration_l[1]: duration_l[1] = 'Year' if duration_l[0] == 1 and 'Months' in duration_l[1]: duration_l[1] = 'Month' course_data['Duration'] = duration_l[0] course_data['Duration_Time'] = duration_l[1] print('COURSE DURATION: ', str(duration_l[0]) + ' / ' + duration_l[1]) print('FULL-TIME/PART-TIME: ', course_data['Full_Time'] + ' / ' + course_data['Part_Time']) # STUDY MODE studyMode_title = soup.find('span',
course_data['Prerequisite_1'] = 'ATAR' course_data[ 'Prerequisite_1_grade_1'] = atar_score break except (AttributeError, IndexError, TypeError) as e: print(e.stacktrace, e.__cause__) # DURATION try: duration_tag = soup.find('span', { 'id': 'course-overview-length', 'class': 'course-overview' }) if duration_tag: duration_text = tag_text(duration_tag) duration = DurationConverter.convert_duration(duration_text) course_data['Duration'] = duration[0] course_data['Duration_Time'] = duration[1] if duration[0] < 2 and 'month' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Month' if duration[0] < 2 and 'year' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Year' if 'week' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Weeks' except (AttributeError, TypeError): print('no info on duration/full/part time mode') print('DURATION + DURATION TIME: ', course_data['Duration'], course_data['Duration_Time'])
# MODE OF STUDY (PART_TIME/FULL_TME) ====================================== if 'full-time' in temp_value_var2: course_data['Mode_of_Study'] = 'Full Time' course_data['Full_Time'] = 'Yes' if 'part-time' in temp_value_var2: course_data['Mode_of_Study'] = 'Part Time' course_data['Part_Time'] = 'Yes' if 'full-time' in temp_value_var2 and 'part-time' in temp_value_var2: course_data['Mode_of_Study'] = 'Full Time / Part Time' print('MODE OF STUDY: ', course_data['Mode_of_Study']) # DURATION + DURATION_TIME ============================================= print('Current Duration: ', value) if 'year' in value.lower(): value_conv = DurationConverter.convert_duration(value) duration = float(''.join(filter(str.isdigit, str(value_conv)))[0]) duration_time = 'Years' if str(duration) == '1' or str(duration) == '1.00' or str(duration) == '1.0': duration_time = 'Year' print('FILTERED DURATION + DURATION_TIME: ' + str(duration) + ' ' + duration_time) course_data['Duration'] = duration course_data['Duration_Time'] = duration_time elif 'month' in value.lower() and 'year' not in value.lower(): value_conv = DurationConverter.convert_duration(value) duration = float(''.join(filter(str.isdigit, str(value_conv)))[0]) duration_time = 'Months' if str(duration) == '1' or str(duration) == '1.00' or str(duration) == '1.0': duration_time = 'Month' print('FILTERED DURATION + DURATION_TIME: ' + str(duration) + ' ' + duration_time) course_data['Duration'] = duration
else: course_data['Availability'] = 'D' except AttributeError: pass # DURATION try: duration_tag = soup.find( 'p', text=re.compile( '^.*Full-time: [\d].*.years|months|weeks|semesters.*.{0,40}$', re.IGNORECASE)) if duration_tag: tagged_duration = tag_text(duration_tag) print('duration so far: ', tag_text(duration_tag)) course_data = DurationConverter.convert_duration_cleanly( tagged_text=tagged_duration, course_dict=course_data) course_data[ 'Full_Time'] = 'Yes' if 'full time' in tagged_duration.lower( ) or 'full-time' in tagged_duration.lower() else '' course_data[ 'Part_Time'] = 'Yes' if 'part time' in tagged_duration.lower( ) or 'part-time' in tagged_duration.lower() else '' else: duration_tag = soup.find( 'p', text=re.compile( '^.*Full-time [\d].*.years|months|weeks|semesters.*.{0,40}$', re.IGNORECASE)) if duration_tag: tagged_duration = tag_text(duration_tag) print('duration so far: ', tag_text(duration_tag))
print('LEVEL CODE: ', course_data['Level_Code']) # DECIDE THE FACULTY for i in faculty_key: for j in faculty_key[i]: if j.lower() in course_data['Course'].lower(): course_data['Faculty'] = i print('FACULTY: ', course_data['Faculty']) # DURATION TEXT duration_td = soup.find('strong', text=re.compile('Duration', re.IGNORECASE))\ .find_parent('td')\ .find_next('td') if duration_td: duration_data = tag_text(duration_td) duration = DurationConverter.convert_duration(duration_data) course_data['Duration'] = duration[0] course_data['Duration_Time'] = duration[1] if duration[0] < 2 and 'month' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Month' if duration[0] < 2 and 'year' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Year' if duration[0] > 48 and 'month' in duration[1].lower(): course_data['Duration'] = 4 course_data['Duration_Time'] = 'Years' print('duration so far: ', duration) print('DURATION + DURATION TIME: ', course_data['Duration'], course_data['Duration_Time'])
# DURATION try: duration_ = soup.find('div', { 'id': 'IntakesTable' }).find('div', {'class': 'row'}) if duration_: duration = tag_text(duration_) print('duration so far: ', duration) if 'part-time' in duration.lower( ) or 'part time' in duration.lower(): course_data['Part_Time'] = 'Yes' if 'full time' in duration.lower( ) or 'full-time' in duration.lower(): course_data['Full_Time'] = 'Yes' head, sep, tail = duration.replace('yrs', 'years').partition(':') duration = DurationConverter.convert_duration(tail) course_data['Duration'] = duration[0] course_data['Duration_Time'] = duration[1] if duration[0] < 2 and 'month' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Month' if duration[0] < 2 and 'year' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Year' if 'week' in duration[1].lower(): course_data['Duration'] = duration[0] course_data['Duration_Time'] = 'Weeks' except (AttributeError, TypeError): print('no info on duration/full/part time mode') print('DURATION + DURATION TIME: ', course_data['Duration'], course_data['Duration_Time'])