def inside_course(count, course_name, link): year = "Year " c = 0 page = simple_link(link) body = page.find('body') div = body.find('div', class_='site-container-md').find('main').find('div').find( 'div', class_='skin-bg-module-light').find( 'div', class_='tk-row anchor-link-scroll') div = div.find('div', id='section2').find( 'div', class_='accordion-body').find_all('div', class_='accordion-body') for i in div: c += 1 year = year + c.__str__() #print(year) li = i.find('ul').find_all('li') if (len(li) != 0): for j in li: subject = j.find('span', class_='module-title').text clist.append(course(count, course_name, link, year, subject)) #print(subject) else: p = i.find('h3').find_next('p').text clist.append(course(count, course_name, link, year, subject)) #print(p) year = year[0:4] print('\n')
def inside_course(count, cc, link): page = javascript_link(link) body = page.find('body') div = body.find('section', class_='visible-body').find( 'section', class_='page-level page-content').find( 'div', class_='wrapper').find('section', id='page-content-main') div = div.find('section', id='main-content').find( 'div', class_='field-items').find( 'div', class_='field-item odd').find('table').find('tbody').find_all('tr') #year = div[0].find('td').text for i in range(1, len(div), 2): year = div[i - 1].find('td').text #print(year) try: li = div[i].find('td').find('ul') try: ul = li.find('ul') for k in ul: subject = k.text clist.append(course(count, cc, link, year, subject)) except: l = li.find_all('li') for k in l: subject = k.text clist.append(course(count, cc, link, year, subject)) except: subject = div[i].find('p').text clist.append(course(count, cc, link, year, subject)) #print(subject) print('\n')
def inside_category(count,course_name,link): c=0 year = "Year " page = simple_link(link) body = page.find('body') div = body.find('form').find('div',class_='offcanvas-wrap').find('div',class_='offcanvas-content').find('div',id='content').find('div',id='modules').find('div',class_='row') divv = div.find('div',class_='column large-12').find('div',class_='modulesTabs').find_all('div',class_='content panel') li = div.find('div',class_='column large-12').find('div',class_='modulesTabs').find('ul').find_all('li') for i in range(len(li)): #c+=1 #year = year + c.__str__() year = li[i].text print(year) try: d_tag = divv[i].find('div',class_='moduleBlock').find_all('div',class_='js-expandmore') for j in d_tag: subject = j.text clist.append(course(count, course_name, link, year, subject)) #print(subject) except: if(count==1): break else: try: subject = divv[i].find('p').text clist.append(course(count, cc, link, year, subject)) #print(subject) except: break year = year[0:4]
def inside_course(count, course_name, link): page = javascript_link(link) body = page.find('body') div = body.find('main').find( 'div', class_='container mx-auto px-4 pb-12 min-h-screen').find( 'div', class_='lg:flex').find('article').find('section', class_='block') div = div.find_all('section') for i in div: year = i.find('h3').text print(year) try: if (count != 3): ul = i.find('ul').find_all('li') for j in ul: subject = j.text clist.append( course(count, course_name, link, year, subject)) print(subject) else: subject = i.find('p').text clist.append(course(count, course_name, link, year, subject)) print(subject) except: subject = i.find('p').text clist.append(course(count, course_name, link, year, subject)) print(subject) print('\n')
def inside_master(count, course_name, link): page = simple_link(link) body = page.find('body') try: div = body.find_all( 'div', class_='content-type content-type--main')[2].find_all('ul') if (len(div) != 0): for i in div: li = i.find_all('li') for j in li: subject = j.text print(subject) clist.append( course(count, course_name, link, "---", subject)) else: div = body.find_all('div', class_='js-tabcontent') for i in div: d_tag = i.find_all('div', class_='content-type content-type--toggle') for j in d_tag: subject = j.find('span').text print(subject) clist.append( course(count, course_name, link, "--", subject)) except: print("Null") print("NULL") clist.append(course(count, course_name, link, "Null", "Null"))
def inside_course(count, course_name, link): page = simple_link(link) body = page.find('body') try: div = body.find( 'div', class_='bcu-tabs-panel man pam secondary-bg-grey-light-3' ).find_all('div', class_='crs-dtl-content-tabs-panel') for i in div: year = i.find('h4', class_='hide-on-desktop mtl mbm').text print(year) table = i.find('table', class_='mvn plain-table').find_all('tr') for j in table: try: td = j.find('td').find( 'div', class_='left-col inline-block size14of16') subject = td.text.strip() print(' ' + subject) clist.append( course(count, course_name, link, year, subject)) except: continue print('\n') except: clist.append(course(count, course_name, link, "Null", "Null")) print("Year : Null" + '\n' + "Subject : Null" + '\n')
def inside_category(count, course_name, link): year = "Year " yc = 0 page = simple_link(link) body = page.find('body') try: div = body.find('section', class_='tabs yearTabs').find_all('div', class_='tabContent') for i in div: yc += 1 year = year + yc.__str__() section = i.find('section', class_='brief').find('ul') print(year) try: if (section == None): tr = i.find('section', class_='modules').find( 'table').find('tbody').find_all('tr') for j in range(1, len(tr)): subject = tr[j].find('td').find('a').text clist.append( course(count, course_name, link, year, subject)) print(' ' + subject) else: li = section.find_all('li') for j in li: subject = j.text clist.append( course(count, course_name, link, year, subject)) print(' ' + subject) except: subject = i.find('section', class_='brief').find('p').text clist.append(course(count, course_name, link, year, subject)) print(subject) year = year[0:4] print('\n') except: div = body.find('div', class_='column-block') h4 = div.find_all('h4') for k in h4: year = k.text if (year.find("Year") == 0): print(year) subject = k.find_next('p').text clist.append(course(count, course_name, link, year, subject)) print(subject) else: break
def inside_module(llist): count = 0 for i in llist: page = simple_link(i) body = page.find('body') course_name = nlist[count] print((count + 1).__str__() + " " + course_name + " " + i) try: div = body.find('main').find( 'div', class_='col-xs-12 col-sm-12 col-md-12 u-p-bottom--3 u-p-right--0' ).find('div', class_='course-block') div = div.find('div', class_='responsive-tabs vertical-tabs') except: page = javascript_link(i) body = page.find('body') div = body.find('main').find( 'div', class_='col-xs-12 col-sm-12 col-md-12 u-p-bottom--3 u-p-right--0' ).find('div', class_='course-block') div = div.find( 'div', class_='responsive-tabs vertical-tabs responsive-tabs--enabled' ) d_tag = div.find_all('div') for j in d_tag: year = j.find_previous('h3').text print(year) try: li = j.find('ul').find_all('li') for k in li: subject = k.text print(' ' + subject) clist.append(course(count, course_name, i, year, subject)) except: try: p = j.find('p') subject = p.text print(' ' + subject) clist.append(course(count, course_name, i, year, subject)) except: break print('\n') count += 1
def inside_course(count, course_name, link): page = javascript_link(link) body = page.find('body') div = body.find('div', id='course-content-accordion').find_all( 'div', class_='panel panel-default') for i in div: year = i.find('div', class_='panel-heading collapsed').find('h4').text print(year) d_tag = i.find('div', class_='panel-collapse collapse') try: iframe = "https:" + d_tag.find('iframe').attrs['src'] try: p = simple_link(iframe).find('body').find('table').find_all( 'tr') for j in range(1, len(p)): try: subject = p[j].find_all('td')[1].text clist.append( course(count, course_name, link, year, subject)) except: break print(subject) except: try: p = simple_link(iframe).find('body').find('p') subject = p.text print(subject) clist.append( course(count, course_name, link, year, subject)) except: li = simple_link(iframe).find('body').find('ul').find_all( 'li') for j in li: subject = j.text print(subject) clist.append( course(count, course_name, link, year, subject)) except: p = d_tag.find('p') subject = p.text print(subject) clist.append(course(count, course_name, link, year, subject)) print('\n')
def category(html_page): baseUrl = 'https://www.liverpool.ac.uk' backUrl = 'module-details' body = html_page.find('body') tables = body.find_all('table',class_='datatable') global count for i in tables: tr = i.find_all('tr') for j in range(1,len(tr)): try: count+=1 link = tr[j].find('td',class_='course-name').find('a').attrs['href'] course_name = tr[j].find('td', class_='course-name').find('a').text if(count==218): print(count.__str__() + " " + course_name + " " + link) link = baseUrl + link inside_course(count, course_name, link) elif(link.find('https')>=0): print(count.__str__() + " " + course_name + " " + link) if(link.find('postgraduate')>=0): link = link.replace("overview",backUrl) master_course(course_name,link) elif(course_name=='Applied English BA (Hons)' or course_name=='Basque (Honours Select)' or course_name=='Catalan (Honours Select) 'or course_name=='Chinese (Honours Select)' or course_name=='Combined Honours' or course_name=='Criminology and Security BA (Hons)' or course_name=='Game Design BA (Hons)' or course_name=='Honours Select' or course_name=='Italian (Honours Select)' or course_name=='Joint Honours' or course_name=='Portuguese (Honours Select)' or course_name=='Social Policy (Honours Select)' or course_name=='Spanish (Honours Select)' or course_name=='Veterinary Conservation Medicine BSc' or course_name == 'Veterinary Conservation Medicine Intercalated Honours BSc'): year = "Null" subject = "Null" print(year) print(subject) clist.append(course(count, course_name, link, year, subject)) elif (link.find('undergraduate') >= 0): link = link.replace("overview", backUrl) print(count.__str__() + " " + course_name + " " + link) inside_course(count, course_name, link) else: inside_course_list(count, course_name, link) else: link = link.replace("overview", backUrl) link = baseUrl + tr[j].find('td',class_='course-name').find('a').attrs['href'] link = link.replace("overview", backUrl) print(count.__str__() + " " + course_name + " " + link) inside_course(count, course_name, link) except: continue print('\n')
def inside_course(count, course_name, links): page = javascript_link(links) body = page.find('body') div_tag = body.find( 'div', class_='campl-row campl-content campl-recessed-content').find( 'div', class_='campl-wrap clearfix').find( 'div', class_='campl-column6 campl-main-content').find( 'div', class_='node node-course-profile clearfix') div_tag = div_tag.find( 'div', class_='content campl-content-container' ).find( 'div', class_= 'field-group-htabs-wrapper group-container-college field-group-htabs' ).find('div', class_='horizontal-tabs clearfix') div_tag = div_tag.find( 'div', class_='horizontal-tabs-panes horizontal-tabs-processed').find( 'fieldset', id='course-outline').find('div', class_='field-item even') total_year = div_tag.find_all('h2') print(count.__str__() + " " + course_name) for i in range(1, len(total_year)): year = total_year[i].text try: ul = div_tag.find('h2').find_all('ul') li = ul[i].find_all('li') for k in li: subject = k.text clist.append(course(count, course_name, links, year, subject)) #print(" " + year + " " + subject) except: p = div_tag.find_all('p') subject = p[i].text clist.append(course(count, course_name, links, year, subject)) print(year + " " + subject) print('\n')
def inside_course(count,course_code,course_name,link): page = simple_link(link) body = page.find('body') div = body.find('div',class_='site-content wrapper').find('div',class_='site-content__inner clearfix').find('div',class_='site-content__body').find('div',class_='site-content__main') div = div.find('div',class_='prospectus--programme').find('section',id='degree-structure').find('div',class_='collapse__content').find('div',class_='tabs') ul = div.find('ul').find_all('li') for i in range(0,len(ul)): year = ul[i].find('a').text tabs = div.find('div',class_='tabs__content island').find_all('div') try: tag = tabs[i].find_all('p')[1] if(len(tag)==0): #print(year) ul_tag = tabs[i].find('ul').find_all('li') for x in ul_tag: subject = x.text clist.append(course(count, course_name, link, year, subject)) #print(subject) else: subject = str(tag).split('<br/>') subject[0] = subject[0].replace('<p>', '') subject[len(subject) - 1] = subject[len(subject) - 1].replace('</p>', '') for xx in subject: clist.append(course(count, course_name, link, year, xx)) #print(year) #print(subject) except: try: print(year) ul_tag = tabs[i].find('ul').find_all('li') for x in ul_tag: subject = x.text clist.append(course(count, course_name, link, year, subject)) #print(subject) except: subject = "Year Abroad" clist.append(course(count, course_name, link, year, subject))
def inside_category(count, course_name, link): page = javascript_link(link) body = page.find('body') if (count == 97): div = body.find('div', class_='dialog-off-canvas-main-canvas').find( 'div', class_='off-canvas-content').find('main').find('section').find( 'div', class_='row page-body') div = div.find('div', class_='single-course-modules').find_all( 'div', class_='tabs-panel') year = "Year " c = 1 for i in div: year = year + c.__str__() #print(year) subject = i.find('p').text #print(subject) clist.append(course(count, course_name, link, year, subject)) year = year[0:4] else: div = body.find('div', id='courseDetailsPage').find('div', id='modules') h4 = div.find_all('h4') table = div.find_all('table') yc = 0 for i in range(0, len(table)): if (table[i].find_previous('p').text == "Optional modules:"): year = table[i].find_previous('h4').text else: year = h4[yc].text yc += 1 #print(year) tr = table[i].find('tbody').find_all('tr') for j in range(0, len(tr), 2): subject = tr[j].find('a').text clist.append(course(count, course_name, link, year, subject)) #print(subject) print('\n')
def inside_course(count, course_name, link): page = simple_link(link) div = page.find( 'div', class_='flex-wrapper pinned-items text-only action-list').find_all( 'div', class_='card standard-card single-item') for i in div: try: year = i.find('header').text print(year) li = i.find('ul').find_all('li') for j in li: subject = j.text print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) except: p = i.find('p') subject = p.text print(' ' + subject) clist.append(course(count, course_name, link, year, subject))
def inside_course(count, course_name, link): year = "Year " c = 0 page = simple_link(link) body = page.find('body') div = body.find( 'div', class_='id7-fixed-width-container').find('main').find( 'div', class_='id7-main-content').find( 'div', class_='column-1-content').find('div', class_='container-text') try: div = div.find('div', class_='hp-layout').find('div', id='course-tab-3') ul = div.find_all('ul') if (len(ul) != 0): for i in ul: c += 1 year = year + c.__str__() print(year) li = i.find_all('li') for j in li: subject = j.text #print(subject) clist.append( course(count, course_name, link, year, subject)) year = year[0:4] print('\n') else: h6 = div.find_all('h6') for i in h6: h5 = i.find_previous('h5') year = h5.text subject = i.text #print(year+'\n'+subject) clist.append(course(count, course_name, link, year, subject)) except: print("Problem with Course " + course_name + " " + link)
def inside_course(count, course_name, link): page = javascript_link(link) body = page.find('body') try: div = body.find('main').find('div', class_='content-toggle').find( 'div', class_='col-md-10 offset-md-1').find_all('div', class_='accordion') except: div = body.find('main').find('div', class_='content-toggle').find( 'div', class_='col-md-8').find_all('div', class_='accordion') for i in div: try: tr = i.find('table', class_='table').find('tbody').find_all('tr') year = i.find_previous('h3').text print(year) for j in tr: subject = j.find('td').find('a').text clist.append(course(count, course_name, link, year, subject)) print(' ' + subject) except: try: d_tag = body.find('main').find( 'div', class_='content-toggle').find( 'div', class_='col-md-10 offset-md-1') h3 = d_tag.find_all('h3') except: d_tag = body.find('main').find( 'div', class_='content-toggle').find('div', class_='col-md-8') h3 = d_tag.find_all('h3') for j in h3: year = j.text print(year) subject = j.find_next('p').find_next('p').text clist.append(course(count, course_name, link, year, subject)) print(subject) break print('\n')
def master_course(course_name,link): page = simple_link(link) body = page.find('body') div = body.find('section', class_='content').find('section', id='module-details').find('div').find_all('h5') year = body.find('section', class_='content').find('h2').text print(year) global count for i in div: count+=1 subject = i.text print(subject) clist.append(course(count, course_name, link, year, subject))
def inside_course(count, course_name, link): page = javascript_link(link) body = page.find('body') div = body.find('div', class_='dialog-off-canvas-main-canvas').find( 'div', class_='layout-container').find('main') div = div.find('div', class_='layout-content').find('article').find( 'section', class_='scrollspy-wrapper position-relative py-0').find( 'div', class_='bg-white section-padding section--triangle z-3') try: div = div.find('div', class_='body-content').find_all('div')[2].find( 'div', class_='module-list').find( 'div', class_='tab-content').find('div', class_='tab-pane active') div2 = div.find_all('div', class_='module-year accordion') for i in div2: tr = i.find('div', class_='card').find_all('div')[1].find( 'table').find('tbody').find_all('tr') year = i.find('div', class_='card').find( 'div', class_= 'card-header border border-deep-cerulean border-right-0 border-bottom-0 border-left-0 p-0' ).find('h5').text print(year) for j in tr: subject = j.find('td').text clist.append(course(count, course_name, link, year, subject)) #print(subject) print('\n') except: clist.append(course(count, course_name, link, "null", "null"))
def inside_course(count,course_name,link): page = simple_link(link) body = page.find('body') div = body.find('div',class_='course-detail').find_all('section',id='module-details') for i in range(1,len(div)): try: year = div[i].find('h4').text except: continue print(year) d_tag = div[i].find('div').find('ul').find_all('li') for j in d_tag: try: subject = j.find('h5').text print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) except: subject = j.text print(subject) clist.append(course(count, course_name, link, year, subject))
def inside_module(count, course_name, link): page = simple_link(link) body = page.find('body') div = body.find('section', class_='tabs') try: yc = div.find('ul').find_all('li') modules = div.find_all('div', class_='tabs__content') for i in range(0, len(yc)): year = yc[i].text print(year) div_tag = modules[i].find_all('div', class_='accordion__section') for j in div_tag: d = j.find('div', class_='accordion__header').find('h2').find( 'span', class_='accordion__button-title') subject = d.text print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) except: try: divv = body.find('article', class_='page__course_subpage course').find( 'div', class_='page__content').find( 'div', class_='wysiwyg').find('p') year = "Year - " print(year) subject = divv.text print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) except: year = "Null" print(year) subject = "Null" print(' ' + subject) clist.append(course(count, course_name, link, year, subject))
def category(html_page): baseUrl = 'https://www.exeter.ac.uk' endUrl = '#course-content' body = html_page.find('body') div = body.find('div', id='all-courses-A-Z').find( 'ul', class_='course-list').find_all('li') count = 0 for i in div: count += 1 course_name = i.find('a').text link = baseUrl + i.find('a').attrs['href'] + endUrl print(count.__str__() + " " + course_name + " " + link) if (course_name == 'Foundation' or link == 'https://www.exeter.ac.uk/undergraduate/courses/foundation/internationalyearone/#course-content' ): clist.append(course(count, course_name, link, "Null", "Null")) else: inside_course(count, course_name, link)
def inside_subject(count, course_name, link): year = "Year " c = 1 page = simple_link(link) body = page.find('body') div = body.find('div', class_='box box-blue').find('div').find_all('div') for i in div: year = year + c.__str__() print(year) c += 1 ul = i.find_all('ul') for j in ul: li = j.find_all('li') for k in li: subject = k.text print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) year = year[0:4] print('\n')
def inside_category(count, course_name, link): year = "Year " page = simple_link(link) body = page.find('body') div = body.find('div', class_='js-tabs tabs').find_all('div', class_='js-tabcontent') c = 0 for i in div: c += 1 year = year + c.__str__() print(year) d_tag = i.find_all('div', class_='content-type content-type--toggle') for j in d_tag: subject = j.find('span').text print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) year = year[0:4]
def category(html_page): body = html_page.find('body') div = body.find('section', class_='visible-body').find( 'section', class_='page-level page-content').find( 'div', class_='wrapper').find('section', id='page-content-main') div = div.find('section', id='main-content').find('table').find( 'tbody').find('tr').find_all('td') counter = 0 for i in div: a = i.find_all('a') for j in a: counter += 1 link = "https:" + j.attrs['href'] cc = j.text print(counter.__str__() + " " + cc + " " + link) if (counter != 36 and counter != 40 and counter != 7): inside_course(counter, cc, link) else: print(counter.__str__() + " " + 'problem with page ' + cc + " " + link) clist.append(course(counter, cc, link, "null", "null"))
def inside_course(count,cc,link): year = "Year " c=0 page = simple_link(link) body = page.find('body') div = body.find('div',class_='pageWrapper').find('article').find('div',class_='row tripleVertPadding').find('div',class_='oneColLayoutContainer').find('div',class_='mainContentContainer') div = div.find('div',class_='corporate').find('div',class_='course-profile-content') table = div.find_all('table') for i in table: c+=1 year = year + c.__str__() #print(year) tr = i.find('tbody').find_all('tr') for j in tr: subject = j.find('td').text clist.append(course(count, cc, link, year, subject)) #print(subject) year = year[0:4] print('\n')
#print(inside_module) #item = inside_module.find_all('div', class_='item') print(count.__str__() + ": " + coures_code + " " + course_title + " " + dept + " " + duration + " ") for i in inside_module.find_all('div', class_='item'): year = i.find('h3').text ul = i.find('ul') try: xx = ul.find_all('li') for j in ul.find_all('li'): cc = j.text clist.append( course(count, course_title, link, year, cc)) #print(" " + year + " " + course) except: p = i.find('p') cc = p.text clist.append(course(count, course_title, link, year, cc)) #print(" " + year + " " + course) print('\n') else: clist.append(course(count, course_title, link, "Null", "Null")) print("Proble with page " + count.__str__() + "\n") wb = Workbook() file_path = 'C:\\Users\\jatin\\Desktop\\imperial.xlsx'
def inside_course(count, course_name, link): page = simple_link(link) body = page.find('body') try: div = body.find('div', class_='tab-container').find( 'div', class_='tabs flex-tabs').find_all('div', class_='panel') c = 0 for i in div: year = i.find('h3').text print(year) li = i.find('ul').find_all('li') if (len(li) != 0): for j in li: d = j.find_all('div') for k in d: subject = k.find('h3').text print(' ' + subject) clist.append( course(count, course_name, link, year, subject)) else: subject = "NULL" print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) print('\n') except: try: div = body.find('article').find_all( 'section', class_='course-section')[1].find('div', class_='unit golden-large') year = div.find('div', class_='accordion-header teal').text li = div.find('ul').find_all('li') for i in li: p = i.find('p').find('strong') subject = p.text print(' ' + subject) clist.append(course(count, course_name, link, year, subject)) except: try: div = body.find('article').find('section', class_='course-section').find( 'div', class_='unit golden-large') li = div.find('ul').find_all('p', class_='module') if (len(li) != 0): print("About") for i in li: subject = i.text print(' ' + subject) clist.append( course(count, course_name, link, "About", subject)) else: print("Research") div = body.find('article').find( 'section', class_='course-section study').find( 'div', class_='unit golden-large') li = div.find('ul').find_all('li') for i in li: subject = i.text print(' ' + subject) clist.append( course(count, course_name, link, "Research", subject)) except: year = "Null" subject = "Null" print(year) print(subject) clist.append(course(count, course_name, link, year, subject))
def main_courses(count, in_course): global c for i in in_course: c += 1 subject = "" page = javascript_link(i) body = page.find('body') try: course_name = body.find('section', role='main').find( 'div', class_='page-intro__wrapper').find( 'div', class_='page-intro__text').find('h1').text try: sec = body.find('section', role='main').find( 'div', class_='container course').find_all( 'div', class_='row')[1].find('div', class_='col-lg-8') ul = sec.find('ul').find_all('li') print(c.__str__() + " " + course_name + " " + i) for j in range(0, len(ul)): try: year = ul[j].text print(year) div = sec.find('div', class_='tab-content').find_all( 'div')[j].find('ul').find_all('li') for k in range(0, len(div)): try: subject = div[k].find('strong').text clist.append( course(count, course_name, i, year, subject)) print(" " + subject) except: subject = div[k].text clist.append( course(count, course_name, i, year, subject)) #print(" " + subject) except: div = sec.find( 'div', class_='tab-content').find_all('div')[j].find('p') subject = div.text clist.append( course(count, course_name, i, year, subject)) print(" " + subject) print('\n') print('\n') except: sec = body.find('section', role='main').find( 'section', class_='' ).find( 'div', class_='container padding-top--half padding-bottom--half' ).find_all('div', class_='row')[1].find( 'div', class_='col-sm-8').find('ul').find_all('li') for x in sec: try: subject = x.find('strong').text year = "Year 1" print(year) print(" " + subject) clist.append(course(c, course_name, i, year, subject)) except: subject = x.find('p').text print(subject) clist.append(course(c, course_name, i, year, subject)) break print('\n') except: clist.append(course(c, course_name, i, "NUll", "Null"))
def inside_link(count, link): page_code = javascript_link(link) body = page_code.find('body').find( 'div', class_='slat__container container--xl py-4 centre study-info') try: div = body.find('div', class_='study-options-navigation') total_course = div.find('ul').find_all('li') key_info = body.find_all('section') for i in range(len(total_course)): course_name = total_course[i].find('a').text div_tag = key_info[i].find('div', class_='option-info').find( 'div', class_='info').find('dl') duration = div_tag.find_all('dd')[1].text course_code = div_tag.find_all('dd')[3].text years_detail = page_code.find('body').find( 'section', id='structure').find( 'div', class_='centre slat__container container--xl py-4' ).find( 'div', class_='column-item container container--md pt-2 clearfix' ).find('div', class_='tabs') years_detailes = years_detail.find_all('div') for jj in years_detailes: year = jj.find('h3', class_='year').text try: ul = jj.find('ul').find_all('li') print(course_code + " " + course_name + " " + duration + " ") for kk in ul: subject = kk.text clist.append( course(count, course_name, link, year, subject)) #print(" " + year + " " +subject) except: year = years_detailes.find('h3').text subject = years_detail.find('h4').text clist.append( course(count, course_name, link, year, subject)) #print(" " + year + " " + subject) except: course_n = page_code.find('body').find( 'section', class_='course slat mt-4').find( 'div', class_='centre page-heading slat__container container--xl' ).find('div', class_='grid').find( 'div', class_='course-title grid__item grid__item--1/2') course_name = course_n.find('h1', class_='th-s2 th-o text-blue').text key_info = page_code.find('body').find( 'div', class_='slat__container container--xl py-4 centre study-info') div_tag = key_info.find('div', class_='info').find('dl') duration = div_tag.find_all('dd')[1].text course_code = div_tag.find_all('dd')[3].text years_detail = page_code.find('body').find( 'section', id='structure' ).find('div', class_='centre slat__container container--xl py-4').find( 'div', class_='column-item container container--md pt-2 clearfix').find( 'div', class_='tabs') years_detailes = years_detail.find_all('div') for jj in years_detailes: year = jj.find('h3', class_='year').text try: ul = jj.find('ul').find_all('li') #print(course_code + " " + course_name + " " + duration + " ") for kk in ul: subject = kk.text clist.append( course(count, course_name, link, year, subject)) #print(" " + year + " " + subject) except: years_detailes = years_detail.find('div') try: year = years_detailes.find('h3', class_='year').text subject = years_detail.find('h4').text clist.append( course(count, course_name, link, year, subject)) #print(" " + year + " " + subject) except: para = years_detail.find_all('p') for l in para: subject = l.text clist.append( course(count, course_name, link, year, subject))