def run(self): src = urlread(self.args["url"]) table_tag = BS(src, parseOnlyThese=SS("table", {"summary": "courses"})) # can't parse trs, some of the pages are missing tr tags # so parse, imgs, courses separately and zip together imgs = table_tag.findAll("img") courses = table_tag.findAll("td", {"class": "departmentRightColumn"}) items = [ { "name": course.strong.string, "mode": MODE_YALE_LECTURES, "url": self.urljoin(course.a["href"]), "info": { "title": course.strong.string, "plot": course.findAll("p")[-1].contents[0], "credits": course.a.contents[2], }, "tn": self.urljoin(img["src"]), } for course, img in zip(courses, imgs) ] self.app.add_dirs(items)
def run(self): src = urlread(self.base_url) dd_tags = BS(src, parseOnlyThese=SS("dd", {"class": "portletItem"})) items = [{"name": tag.a["title"], "url": tag.a["href"], "mode": MODE_YALE_COURSES} for tag in dd_tags] self.app.add_dirs(items)
def run(self): html = BS(urlread(self.browse_url)) ul_tags = html.findAll('ul', {'class': 'links'}) items = [{'name': a.string, 'url': self.urljoin(a['href']), 'topic': a['href'].rsplit('/', 1)[1], 'mode': MODE_MITWORLD_VIDEOS} for a in ul_tags[2].findAll('a')] self.app.add_dirs(items)
def run(self): html = BS(urlread(self.args['url'])) flashvars = html.find('embed')['src'] params = parse_url_qs(flashvars) host = params['host'] playpath = 'ampsflash/%s' % params['flv'] url = 'rtmp://%s/ondemand?_fcs_vhost=%s' % (host, host) properties = {'PlayPath': playpath, 'TcUrl': url} self.app.play_video(url, info=self.args['info'], properties=properties)
def run(self): # Course's downloads pages don't follow a particular pattern, # so visit each course page, then parse the link to the Downloads page html = BS(urlread(self.args["url"])) url = html.find("a", {"title": "Downloads"})["href"] src = urlread(url) table_tag = BS(src, parseOnlyThese=SS("table", {"id": "downloadsTable"})) # filter out trs that don't contain a lecture such as header, footer rows lecture_tags = filter(lambda t: t.a, table_tag.findAll("tr")) items = [ { "name": "".join(tag.findAll("td")[1].findAll(text=True)).strip(), "mode": MODE_YALE_PLAYVIDEO, "url": tag.a["href"], "referer": url, } for tag in lecture_tags ] self.app.add_resolvable_dirs(items)
def run(self): page = self.args.get('page', 1) topic = self.args['topic'] url = self.pageurl(topic, page) html = BS(urlread(url)) items = [{'name': ''.join(tag.h4.a.findAll(text=True)).encode('utf-8'), 'url': self.urljoin(tag.a['href']), 'mode': MODE_MITWORLD_PLAYVIDEO, 'tn': self.urljoin(tag.img['src']), 'info': {'title': ''.join(tag.h4.a.findAll(text=True)).encode('utf-8'), 'aired': ''.join(tag.find('p', {'class': 'date'}).findAll(text=True)), 'credits': unicode(tag.find('p', {'class': 'speaker'}).string), }, } for tag in html.findAll('li', {'class': 'video'})] pagination_items = self.get_pagination_items(html, topic, page) if len(pagination_items) > 0: self.app.add_dirs(pagination_items, end=False) self.app.add_resolvable_dirs(items)