예제 #1
0
파일: pages.py 프로젝트: Konubinix/weboob
    def get_subtitle(self):
        desc = NotAvailable
        a = self.parser.select(self.document.getroot(), 'a#bt-dwl', 1)
        id = a.attrib.get('rel', '').split('/')[-1]
        m = re.match('Download \((\w+)\)', self.parser.tocleanstring(a))
        if m:
            ext = m.group(1)
        else:
            ext = u'zip'
        url = unicode('http://www.opensubtitles.org/subtitleserve/sub/%s' % id)
        link = self.parser.select(self.document.getroot(), 'link[rel=bookmark]', 1)
        title = unicode(link.attrib.get('title', ''))
        nb_cd = int(title.lower().split('cd')[0].split()[-1])
        lang = unicode(title.split('(')[1].split(')')[0])
        file_names = self.parser.select(self.document.getroot(), "img[title~=filename]")
        if len(file_names) > 0:
            file_name = file_names[0].getparent().text_content()
            file_name = ' '.join(file_name.split())
            desc = u'files :'
            for f in file_names:
                desc_line = f.getparent().text_content()
                desc += '\n'+' '.join(desc_line.split())
        name = unicode('%s (%s)' % (title, file_name))

        subtitle = Subtitle(id, name)
        subtitle.url = url
        subtitle.ext = ext
        for lshort, llong in LANGUAGE_CONV.items():
            if lang == llong:
                lang = unicode(lshort)
                break
        subtitle.language = lang
        subtitle.nb_cd = nb_cd
        subtitle.description = desc
        return subtitle
예제 #2
0
파일: pages.py 프로젝트: lissyx/weboob
    def get_subtitle(self):
        filename_line = self.parser.select(self.document.getroot(),
                                           'img[alt=filename]',
                                           1).getparent().getparent()
        name = unicode(self.parser.select(filename_line, 'td')[2].text)
        id = self.browser.geturl().split('/')[-1].replace('.html', '').replace(
            'subtitle-', '')
        url = unicode('http://%s/download-%s.html' % (self.browser.DOMAIN, id))
        amount_line = self.parser.select(self.document.getroot(),
                                         'tr[title~=amount]', 1)
        nb_cd = int(self.parser.select(amount_line, 'td')[2].text)
        lang = unicode(url.split('-')[-1].split('.html')[0])
        filenames_line = self.parser.select(self.document.getroot(),
                                            'tr[title~=list]', 1)
        file_names = self.parser.select(
            filenames_line,
            'td')[2].text_content().strip().replace('.srt', '.srt\n')
        desc = u"files :\n"
        desc += file_names

        m = re.match('(.*?)\.(\w+)$', name)
        if m:
            name = m.group(1)
            ext = m.group(2)
        else:
            ext = 'zip'

        subtitle = Subtitle(id, name)
        subtitle.url = url
        subtitle.ext = ext
        subtitle.language = lang
        subtitle.nb_cd = nb_cd
        subtitle.description = desc
        return subtitle
예제 #3
0
    def get_subtitle(self, id):
        href = id.split('|')[1]
        # we have to find the 'tr' which contains the link to this address
        a = self.parser.select(self.document.getroot(), 'a[href="%s"]' % href,
                               1)
        line = a.getparent().getparent().getparent().getparent().getparent()
        cols = self.parser.select(line, 'td')
        traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
        original_title = self.parser.select(cols[1], 'font', 1).text.lower()
        nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
        nb_cd = int(nb_cd.split()[0])

        traduced_title_words = traduced_title.split()
        original_title_words = original_title.split()

        # this is to trash special spacing chars
        traduced_title = " ".join(traduced_title_words)
        original_title = " ".join(original_title_words)

        name = unicode('%s (%s)' % (original_title, traduced_title))
        url = unicode('http://davidbillemont3.free.fr/%s' % href)
        subtitle = Subtitle(id, name)
        subtitle.url = url
        subtitle.ext = url.split('.')[-1]
        subtitle.language = unicode('fr')
        subtitle.nb_cd = nb_cd
        subtitle.description = NotAvailable
        return subtitle
예제 #4
0
파일: pages.py 프로젝트: Boussadia/weboob
    def get_subtitle(self):
        filename_line = self.parser.select(self.document.getroot(), 'img[alt=filename]', 1).getparent().getparent()
        name = unicode(self.parser.select(filename_line, 'td')[2].text)
        id = self.browser.geturl().split('/')[-1].replace('.html', '').replace('subtitle-', '')
        url = unicode('http://%s/download-%s.html' % (self.browser.DOMAIN, id))
        amount_line = self.parser.select(self.document.getroot(), 'tr[title~=amount]', 1)
        nb_cd = int(self.parser.select(amount_line, 'td')[2].text)
        lang = unicode(url.split('-')[-1].split('.html')[0])
        filenames_line = self.parser.select(self.document.getroot(), 'tr[title~=list]', 1)
        file_names = self.parser.select(filenames_line, 'td')[2].text_content().strip().replace('.srt', '.srt\n')
        desc = u"files :\n"
        desc += file_names

        m = re.match('(.*?)\.(\w+)$', name)
        if m:
            name = m.group(1)
            ext = m.group(2)
        else:
            ext = 'zip'

        subtitle = Subtitle(id, name)
        subtitle.url = url
        subtitle.ext = ext
        subtitle.language = lang
        subtitle.nb_cd = nb_cd
        subtitle.description = desc
        return subtitle
예제 #5
0
파일: pages.py 프로젝트: antibios/weboob
    def get_subtitle(self):
        filename_line = self.doc.xpath('//img[@alt="filename"]')[0].getparent().getparent()
        name = to_unicode(filename_line.xpath('.//td')[2].text)
        id = self.url.split('/')[-1].replace('.html', '').replace('subtitle-', '')
        url = '%s/download-%s.html' % (self.browser.BASEURL, id)
        amount_line, = self.doc.xpath('//tr[contains(@title, "amount")]')
        nb_cd = int(amount_line.xpath('.//td')[2].text)
        lang = url.split('-')[-1].split('.html')[0]
        filenames_line, = self.doc.xpath('//tr[contains(@title,"list")]')
        file_names = filenames_line.xpath('.//td')[2].text_content().strip().replace('.srt', '.srt\n')
        desc = u"files :\n"
        desc += file_names

        m = re.match('(.*?)\.(\w+)$', name)
        if m:
            name = m.group(1)
            ext = m.group(2)
        else:
            ext = 'zip'

        subtitle = Subtitle(id, name)
        subtitle.url = url
        subtitle.ext = ext
        subtitle.language = lang
        subtitle.nb_cd = nb_cd
        subtitle.description = desc
        return subtitle
예제 #6
0
파일: pages.py 프로젝트: juliaL03/weboob
    def get_subtitle(self, id):
        href = id.split('|')[1]
        # we have to find the 'tr' which contains the link to this address
        a = self.parser.select(self.document.getroot(), 'a[href="%s"]' % href, 1)
        line = a.getparent().getparent().getparent().getparent().getparent()
        cols = self.parser.select(line, 'td')
        traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
        original_title = self.parser.select(cols[1], 'font', 1).text.lower()
        nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
        nb_cd = int(nb_cd.split()[0])

        traduced_title_words = traduced_title.split()
        original_title_words = original_title.split()

        # this is to trash special spacing chars
        traduced_title = " ".join(traduced_title_words)
        original_title = " ".join(original_title_words)

        name = unicode('%s (%s)' % (original_title, traduced_title))
        url = unicode('http://davidbillemont3.free.fr/%s' % href)
        subtitle = Subtitle(id, name)
        subtitle.url = url
        subtitle.ext = url.split('.')[-1]
        subtitle.language = unicode('fr')
        subtitle.nb_cd = nb_cd
        subtitle.description = NotAvailable
        return subtitle
예제 #7
0
    def iter_subtitles(self, language, pattern):
        pattern = pattern.strip().replace('+', ' ').lower()
        pattern_words = pattern.split()
        tab = self.parser.select(self.document.getroot(),
                                 'table[bordercolor="#B8C0B2"]')
        if len(tab) == 0:
            tab = self.parser.select(self.document.getroot(),
                                     'table[bordercolordark="#B8C0B2"]')
        if len(tab) == 0:
            return
        # some results of freefind point on useless pages
        if tab[0].attrib.get('width', '') != '100%':
            return
        for line in tab[0].getiterator('tr'):
            cols = self.parser.select(line, 'td')
            traduced_title = self.parser.select(cols[0], 'font',
                                                1).text.lower()
            original_title = self.parser.select(cols[1], 'font',
                                                1).text.lower()

            traduced_title_words = traduced_title.split()
            original_title_words = original_title.split()

            # if the pattern is one word and in the title OR if the
            # intersection between pattern and the title is at least 2 words
            if (len(pattern_words) == 1 and pattern in traduced_title_words) or\
               (len(pattern_words) == 1 and pattern in original_title_words) or\
               (len(list(set(pattern_words) & set(traduced_title_words))) > 1) or\
               (len(list(set(pattern_words) & set(original_title_words))) > 1):

                # this is to trash special spacing chars
                traduced_title = " ".join(traduced_title_words)
                original_title = " ".join(original_title_words)

                nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
                nb_cd = int(nb_cd.strip(' CD'))
                name = unicode('%s (%s)' % (original_title, traduced_title))
                href = self.parser.select(cols[3], 'a',
                                          1).attrib.get('href', '')
                url = unicode('http://davidbillemont3.free.fr/%s' % href)
                id = unicode('%s|%s' %
                             (self.browser.geturl().split('/')[-1], href))
                subtitle = Subtitle(id, name)
                subtitle.url = url
                subtitle.ext = url.split('.')[-1]
                subtitle.language = unicode('fr')
                subtitle.nb_cd = nb_cd
                subtitle.description = NotAvailable
                yield subtitle
예제 #8
0
파일: pages.py 프로젝트: juliaL03/weboob
    def iter_subtitles(self, language, pattern):
        pattern = pattern.strip().replace('+', ' ').lower()
        pattern_words = pattern.split()
        tab = self.parser.select(self.document.getroot(), 'table[bordercolor="#B8C0B2"]')
        if len(tab) == 0:
            tab = self.parser.select(self.document.getroot(), 'table[bordercolordark="#B8C0B2"]')
            if len(tab) == 0:
                return
        # some results of freefind point on useless pages
        if tab[0].attrib.get('width', '') != '100%':
            return
        for line in tab[0].getiterator('tr'):
            cols = self.parser.select(line, 'td')
            traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
            original_title = self.parser.select(cols[1], 'font', 1).text.lower()

            traduced_title_words = traduced_title.split()
            original_title_words = original_title.split()

            # if the pattern is one word and in the title OR if the
            # intersection between pattern and the title is at least 2 words
            if (len(pattern_words) == 1 and pattern in traduced_title_words) or\
               (len(pattern_words) == 1 and pattern in original_title_words) or\
               (len(list(set(pattern_words) & set(traduced_title_words))) > 1) or\
               (len(list(set(pattern_words) & set(original_title_words))) > 1):

                # this is to trash special spacing chars
                traduced_title = " ".join(traduced_title_words)
                original_title = " ".join(original_title_words)

                nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
                nb_cd = int(nb_cd.strip(' CD'))
                name = unicode('%s (%s)' % (original_title, traduced_title))
                href = self.parser.select(cols[3], 'a', 1).attrib.get('href', '')
                url = unicode('http://davidbillemont3.free.fr/%s' % href)
                id = unicode('%s|%s' % (self.browser.geturl().split('/')[-1], href))
                subtitle = Subtitle(id, name)
                subtitle.url = url
                subtitle.ext = url.split('.')[-1]
                subtitle.language = unicode('fr')
                subtitle.nb_cd = nb_cd
                subtitle.description = NotAvailable
                yield subtitle
예제 #9
0
파일: pages.py 프로젝트: linura/weboob
    def get_subtitle(self):
        desc = NotAvailable
        a = self.parser.select(self.document.getroot(), 'a#bt-dwl-bt', 1)
        id = a.attrib.get('data-product-id', '')
        m = re.match('Download \((\w+)\)', self.parser.tocleanstring(a))
        if m:
            ext = m.group(1)
        else:
            ext = u'zip'
        url = unicode('http://www.opensubtitles.org/en/subtitleserve/sub/%s' %
                      id)
        link = self.parser.select(self.document.getroot(),
                                  'link[rel=bookmark]', 1)
        title = unicode(link.attrib.get('title', ''))
        nb_cd = int(title.lower().split('cd')[0].split()[-1])
        lang = unicode(title.split('(')[1].split(')')[0])
        file_names = self.parser.select(self.document.getroot(),
                                        "img[title~=filename]")
        if len(file_names) > 0:
            file_name = file_names[0].getparent().text_content()
            file_name = ' '.join(file_name.split())
            desc = u'files :'
            for f in file_names:
                desc_line = f.getparent().text_content()
                desc += '\n' + ' '.join(desc_line.split())
        name = unicode('%s (%s)' % (title, file_name))

        subtitle = Subtitle(id, name)
        subtitle.url = url
        subtitle.ext = ext
        for lshort, llong in LANGUAGE_CONV.items():
            if lang == llong:
                lang = unicode(lshort)
                break
        subtitle.language = lang
        subtitle.nb_cd = nb_cd
        subtitle.description = desc
        return subtitle