Пример #1
0
 def get_episodes(self, channel):
     url = 'http://www.youtube.com/user/CanalACulturaActiva/feed'
     j = 1
     shows = {}
     html = urllib.urlopen(url).read()
     dom = lxml.html.document_fromstring(html)
     for item in dom.cssselect('.feed-item-main'):
             p = [x.strip() for x in item.cssselect('h4')[0].text_content().split('-')]
             show_title = p[0]
             episode_title = '-'.join(p[1:])
             try:
                 serie = Serie.objects.get(name=show_title)
             except Serie.DoesNotExist:
                 serie = Serie(name=show_title)
                 print ">> SERIE: %s" % show_title.encode('utf8')
                 serie.channel = channel
                 serie.save()
                 serie.genres.add(Genre.objects.get_or_create(code='CULTURA',defaults={'name':'Cultura'})[0])
             
             if Episode.objects.filter(name=episode_title).count() > 0:
                 continue
             episode = Episode(serie=serie, name=episode_title, number=0)
             print "%s" % episode_title
             episode.description = item.cssselect('.description')[0].text_content() + "\n" + \
                 item.cssselect('.video-time')[0].text_content()
             episode.thumbnail = urllib.basejoin(self.BASE_URL, item.cssselect('.video-thumb img')[0].get('src'))
             episode.save()
             url2 = item.cssselect('a')[0].get('href')
             video_id = re.findall('v=([^&]+)', url2)[0]
             video_url = get_youtube_url(video_id)
             media = HttpMediaFile(width=640, height=480, mimetype='video/mp4', url=video_url)
             media.episode = episode
             media.save()
             serie.episode_set.add(episode)
Пример #2
0
    def scrap_episode(self, serie, url, thumbnail):
        if Episode.objects.filter(url=url).count() > 0:
            print "EXISTS"
            return

        print url
        html = urllib.urlopen(url).read()
        soup = BeautifulSoup(html, from_encoding='utf-8')
        info = soup.find('article','info')

        p = soup.find('div','description').text
        season,number,duration = re.findall('Temporada ([0-9]+) \| Ep. ([0-9]+) \(([0-9:]+)\)', p)[0]
        if Episode.objects.filter(serie=serie, season=int(season), number=int(number)).count() > 0:
            print "EXISTS"
            return
        episode = Episode()
        episode.serie = serie
        episode.season = int(season)
        episode.number = int(number)
        episode.duration = time(0,*map(int,duration.split(':')))
        episode.description =  soup.find('div','description').p.children.next()
        episode.thumbnail = thumbnail
        episode.save()

        media = MundoFoxMediaFile()
        smil_url = re.findall('player.releaseUrl = "([^"]+)"', html)[0]
        smil_url += "&manifest=m3u&format=SMIL&Tracking=true&Embedd=true"
        media._url = smil_url
        media.episode = episode
        media.save()
Пример #3
0
 def scrap_serie(self, serie):
     html = urllib.urlopen(serie.url).read()
     html = re.sub('<\?.*?\?>','',html)
     soup = BeautifulSoup(html, from_encoding='utf8')
     for cnt,article in enumerate(soup('td','listado-mediateca-menu')):
         episode = Episode()
         episode.serie = serie
         episode.thumbnail = urllib.basejoin(self.base_url, article.find('div','imagen-mediateca').img.get('src'))
         episode.name = article.find('h5','titulo-mediateca').a.text
         print episode.name.encode('utf8')
         episode.description = article.find('span','texto-mediateca').text
         episode_url = urllib.basejoin(self.base_url, article.find('h5','titulo-mediateca').a.get('href'))
         episode.number = cnt+1
         episode.season = 1
         episode.save()
         self.scrap_episode(episode, episode_url)