Пример #1
0
 def display_lectures(self, url):
     """displays the lectures for a given course url"""
     html = urlread(url)
     #get the div which contains all of the <li> lecture tags
     div_tag = BS(html, parseOnlyThese=SS('div', {'class': 'results-list'}))
     #parse the name, url, desc, tn for each lecture
     dirs = [{'name': li.h4.a.text,
              'htmlurl': self._urljoin(li.h4.a['href']),
              'info': {'plot': li.p.text, 'title': li.h4.a.text},
              'tn':self._urljoin(
                 li.find('img', {'class': 'thumb-144'})['src'])}
              for li in div_tag('li')]
     #for each dir, download the lecture's html page and parse the video url
     self.dp = DialogProgress(self.getString(30000),
                              line1=self.getString(30101),
                              num_steps=(len(dirs)))
     urls = [d['htmlurl'] for d in dirs]
     responses = async_urlread(urls, self.dp)
     [d.update({'url': self._get_video_url(response)}) 
         for d, response in zip(dirs, responses)]
     #filter out lectures that don't have urls, currently a fix for a chem
     #course which contains a bad link to a lecture
     dirs = filter(lambda d: d['url'] != None, dirs)
     self.dp.update(100)
     self.dp.close()
     self.add_videos(dirs)
Пример #2
0
    def _get_courses_lectures(self, htmls):
        """returns a tuple of lists: (courses_list, lectures_list).  It takes
        the html source(s) of a topic page and parses all results by visiting 
        each page of results"""
        if type(htmls).__name__ == 'str': htmls = [htmls]
        #Each topic page displays only 12 results to a page.  So to get all
        #results for a topic, parse all page results urls from the topic page,
        #then download each of the extra pages of results, then parse the video
        #results.
        pagination_urls = [url for html in htmls
                           for url in self._get_pagination_urls(html)]
        #Download every pagination page.  If a dialog progress box exists,
        #update the step for each increment.  Allocate 50% of the bar for
        #downloading the pagination urls.  The other 50% is allocated to
        #downloading all of the topic pages when choosing 'View All' for a
        #subject.
        if self.dp and len(pagination_urls) != 0:
            self.dp.step = int(50 / len(pagination_urls))
            page_htmls = async_urlread(pagination_urls, self.dp)
        else:
            page_htmls = async_urlread(pagination_urls)

        #extend the list of pagination htmls with the given htmls
        page_htmls.extend(htmls)
        #get a complete list of video results by parsing results from all pages
        results = self._get_video_results(page_htmls)
        #filter courses and lectures so they can be displayed in groups
        courses = filter(lambda r: '/courses/' in r['url'], results)
        lectures = filter(lambda r: '/lectures/' in r['url'], results)
        #add mode argument to courses, lectures don't need it since they will
        #contain a direct url to the video
        [c.update({'mode': 3}) for c in courses]
        #get the actual URL for the video for each lecture, this ensures that
        #the display link plays a video, and doesn't go to another level of 
        #directory listings
        [l.update({'url': self._get_video_url(l['url']),
                   'name': self.getString(30103) + l['name']}) 
                  for l in lectures]
        #filter out lectures with no video url.  This is a result of bad regex
        #parsing, crappy fix...
        lectures = [l for l in lectures if l['url'] is not None]
        return courses, lectures
Пример #3
0
 def display_allresults(self, url):
     """displays all results for a given url, used on a subject page t lis
     all video results without having to drill down into each category"""
     #dp = self.xbmcgui.DialogProgress()
     html = urlread(url)
     #get the div which contains all of the topic <a> tags
     div_topics = BS(html, 
                     parseOnlyThese=SS('div', {'class': 'results-side'}))
     #create a list of urls for all topics
     topic_urls = [self._urljoin(a['href']) for a in div_topics('a')
         if a.text.startswith('Online') == False and
         'Credit' not in a.text and not a.text.startswith('All')]
     self.dp = DialogProgress(self.getString(30000),
                              line1=self.getString(30102),
                              num_steps=(2 * len(topic_urls)))
     topic_htmls = async_urlread(topic_urls, self.dp)
     courses, lectures = self._get_courses_lectures(topic_htmls)
     self.dp.update(100)
     self.dp.close()
     courses = sorted(courses, key=lambda c: c['name'])
     lectures = sorted(lectures, key=lambda l: l['name'])
     self.add_dirs(courses, end=False)
     self.add_videos(lectures)