示例#1
0
    def parse_results(self, raw_xml):
        """Parse the xml and return a list of dictionaries like:
            [   {'IDSubtitle': 'foo',
                    'LinkUseNext': 'foo',
                    'MovieName': 'foo_movie',
                    ...
                },
                {'IDSubtitle': 'foo',
                    'LinkUseNext': 'foo',
                    'MovieName': 'foo_movie',
                    ...
                },
                ...]
        """

        dom = minidom.parseString(raw_xml)  # Make the dom from raw xml
        entries = dom.getElementsByTagName(
            'opensubtitles')  # Pull out all entry's
        result_entries = []  # Make an empty container to fill up and return
        data = None
        # fetch the wanted result xml node
        for entry in entries:
            if len(entry.getElementsByTagName('results')) > 0:
                for result in entry.getElementsByTagName('results'):
                    if len(result.getElementsByTagName('subtitle')) > 0:
                        data = result.getElementsByTagName('subtitle')
                        break
                break
        # print "data=", data
        if not data:
            return []
        # catch all subtitles information
        for entry in data:
            try:
                sub_obj = subtitlefile.SubtitleFile(online=True)
                sub = {}

                if entry.getElementsByTagName('EpisodeName'):
                    return
                if entry.getElementsByTagName('IDSubtitle'):
                    sub['IDSubtitle'] = {'IDSubtitle': entry.getElementsByTagName('IDSubtitle')[0].firstChild.data,
                                         'Link': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('Link'),
                                         'LinkImdb': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('LinkImdb'),
                                         'DownloadLink': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('DownloadLink'),
                                         'uuid': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('uuid'),
                                         }
                    sub_obj.setIdOnline(sub['IDSubtitle']['IDSubtitle'])
                if entry.getElementsByTagName('IDSubtitleFile'):
                    sub['IDSubtitleFile'] = {'IDSubtitleFile': entry.getElementsByTagName('IDSubtitleFile')[0].firstChild.data,
                    }
                    sub_obj.setIdFileOnline(sub['IDSubtitleFile']['IDSubtitleFile'])
                if entry.getElementsByTagName('UserID'):
                    sub['UserID'] = {'UserID': entry.getElementsByTagName('UserID')[0].firstChild.data,
                                     'Link': entry.getElementsByTagName('UserID')[0].getAttribute('Link'),
                                     }
                if entry.getElementsByTagName('UserNickName') and entry.getElementsByTagName('UserNickName')[0].firstChild:
                    sub['UserNickName'] = entry.getElementsByTagName(
                        'UserNickName')[0].firstChild.data
                    sub_obj._uploader = sub['UserNickName']
                if entry.getElementsByTagName('MovieID'):
                    #sub['MovieID'] = entry.getElementsByTagName('MovieID')[0].firstChild.data
                    sub['MovieID'] = {'MovieID': entry.getElementsByTagName('MovieID')[0].firstChild.data,
                                      'Link': entry.getElementsByTagName('MovieID')[0].getAttribute('Link'),
                                      'LinkImdb': entry.getElementsByTagName('MovieID')[0].getAttribute('LinkImdb'),
                                      }
                if entry.getElementsByTagName('MovieThumb') and entry.getElementsByTagName('MovieThumb')[0].firstChild:
                    sub['MovieThumb'] = entry.getElementsByTagName(
                        'MovieThumb')[0].firstChild.data
                if entry.getElementsByTagName('LinkUseNext') and entry.getElementsByTagName('LinkUseNext')[0].firstChild:
                    sub['LinkUseNext'] = entry.getElementsByTagName(
                        'LinkUseNext')[0].firstChild.data
                if entry.getElementsByTagName('LinkZoozle') and entry.getElementsByTagName('LinkZoozle')[0].firstChild:
                    sub['LinkZoozle'] = entry.getElementsByTagName(
                        'LinkZoozle')[0].firstChild.data
                if entry.getElementsByTagName('LinkTorrentbar') and entry.getElementsByTagName('LinkTorrentbar')[0].firstChild:
                    sub['LinkTorrentbar'] = entry.getElementsByTagName(
                        'LinkTorrentbar')[0].firstChild.data
                if entry.getElementsByTagName('LinkBoardreader') and entry.getElementsByTagName('LinkBoardreader')[0].firstChild:
                    sub['LinkBoardreader'] = entry.getElementsByTagName(
                        'LinkBoardreader')[0].firstChild.data
                if entry.getElementsByTagName('MovieName') and entry.getElementsByTagName('MovieName')[0].firstChild:
                    sub['MovieName'] = entry.getElementsByTagName(
                        'MovieName')[0].firstChild.data
                if entry.getElementsByTagName('MovieYear') and entry.getElementsByTagName('MovieYear')[0].firstChild:
                    sub['MovieYear'] = entry.getElementsByTagName(
                        'MovieYear')[0].firstChild.data
                if entry.getElementsByTagName('MovieImdbRating') and entry.getElementsByTagName('MovieImdbRating')[0].firstChild:
                    sub['MovieImdbRating'] = entry.getElementsByTagName(
                        'MovieImdbRating')[0].firstChild.data
                elif not entry.getElementsByTagName('MovieImdbRating')[0].firstChild:
                    sub['MovieImdbRating'] = 0
                if entry.getElementsByTagName('MovieImdbID') and entry.getElementsByTagName('MovieImdbID')[0].firstChild:
                    sub['MovieImdbID'] = entry.getElementsByTagName(
                        'MovieImdbID')[0].firstChild.data
                if entry.getElementsByTagName('SubAuthorComment'):
                    try:
                        sub['SubAuthorComment'] = entry.getElementsByTagName(
                            'SubAuthorComment')[0].firstChild.data
                    except AttributeError:
                        sub['SubAuthorComment'] = entry.getElementsByTagName(
                            'SubAuthorComment')[0].firstChild
                if entry.getElementsByTagName('ISO639'):
                    sub['ISO639'] = {'ISO639': entry.getElementsByTagName('ISO639')[0].firstChild.data,
                                     'LinkSearch': entry.getElementsByTagName('ISO639')[0].getAttribute('LinkSearch'),
                                     'flag': entry.getElementsByTagName('ISO639')[0].getAttribute('flag'),
                                     }
                    sub_obj.setLanguage(Language.from_xx(sub['ISO639']['ISO639']))
                    #sub_obj._onlineId = sub['IDSubtitle']['IDSubtitle']
                    # It does require the Subtitle ID to downlad, not the
                    # Subtitle File Id
                    sub_obj.setExtraInfo(
                        'downloadLink', "http://www.opensubtitles.org/download/sub/%s" % sub_obj.getIdOnline())
                if entry.getElementsByTagName('LanguageName') and entry.getElementsByTagName('LanguageName')[0].firstChild:
                    sub['LanguageName'] = entry.getElementsByTagName(
                        'LanguageName')[0].firstChild.data
                if entry.getElementsByTagName('SubFormat') and entry.getElementsByTagName('SubFormat')[0].firstChild:
                    sub['SubFormat'] = entry.getElementsByTagName(
                        'SubFormat')[0].firstChild.data
                    sub_obj.setExtraInfo('format', sub['SubFormat'])
                if entry.getElementsByTagName('SubSumCD') and entry.getElementsByTagName('SubSumCD')[0].firstChild:
                    sub['SubSumCD'] = entry.getElementsByTagName(
                        'SubSumCD')[0].firstChild.data
                    sub_obj.setExtraInfo('totalCDs', sub['SubSumCD'])
                if entry.getElementsByTagName('SubAddDate') and entry.getElementsByTagName('SubAddDate')[0].firstChild:
                    sub['SubAddDate'] = entry.getElementsByTagName(
                        'SubAddDate')[0].firstChild.data
                if entry.getElementsByTagName('SubBad') and entry.getElementsByTagName('SubBad')[0].firstChild:
                    sub['SubBad'] = entry.getElementsByTagName(
                        'SubBad')[0].firstChild.data
                if entry.getElementsByTagName('SubRating') and entry.getElementsByTagName('SubRating')[0].firstChild:
                    sub['SubRating'] = entry.getElementsByTagName(
                        'SubRating')[0].firstChild.data
                    sub_obj.setRating(sub['SubRating'])
                if entry.getElementsByTagName('SubDownloadsCnt') and entry.getElementsByTagName('SubDownloadsCnt')[0].firstChild:
                    sub['SubDownloadsCnt'] = entry.getElementsByTagName(
                        'SubDownloadsCnt')[0].firstChild.data
                    sub_obj.setExtraInfo(
                        'totalDownloads', sub['SubDownloadsCnt'])
                if entry.getElementsByTagName('SubMovieAka') and entry.getElementsByTagName('SubMovieAka')[0].firstChild:
                    sub['SubMovieAka'] = entry.getElementsByTagName(
                        'SubMovieAka')[0].firstChild.data
                if entry.getElementsByTagName('SubDate') and entry.getElementsByTagName('SubDate')[0].firstChild:
                    sub['SubDate'] = entry.getElementsByTagName(
                        'SubDate')[0].firstChild.data
                if entry.getElementsByTagName('SubComments') and entry.getElementsByTagName('SubComments')[0].firstChild:
                    sub['SubComments'] = entry.getElementsByTagName(
                        'SubComments')[0].firstChild.data
                if entry.getElementsByTagName('TotalSubs') and entry.getElementsByTagName('TotalSubs')[0].firstChild:
                    sub['TotalSubs'] = entry.getElementsByTagName(
                        'TotalSubs')[0].firstChild.data
                if entry.getElementsByTagName('Newest') and entry.getElementsByTagName('Newest')[0].firstChild:
                    sub['Newest'] = entry.getElementsByTagName(
                        'Newest')[0].firstChild.data
                if sub:
                    # result_entries.append(sub)
                    temp_movie = Movie(sub)
                    movie_exists = False
                    for movie in result_entries:
                        if movie.MovieId == temp_movie.MovieId:
                            movie_exists = True
                            if hasattr(sub_obj, "_extraInfo") and sub_obj._extraInfo:
                                movie.subtitles.append(sub_obj)
#                            already_movie = result_entries.pop(result_entries.index(movie))
#                            temp_movie.subtitles = already_movie.subtitles
                    if not movie_exists:
                        if hasattr(sub_obj, "_extraInfo") and sub_obj._extraInfo:
                            temp_movie.subtitles.append(sub_obj)
                        result_entries.append(temp_movie)

            except IndexError as e:
                pass
        return result_entries
示例#2
0
    def _xml_to_subtitles(self, xml):
        subtitle_entries, nb_so_far, nb_provider = self._extract_subtitle_entries(
            xml)
        if subtitle_entries is None:
            return None, None, None

        subtitles = []
        for subtitle_entry in subtitle_entries:
            try:
                ads_entries = subtitle_entry.getElementsByTagName(
                    'ads1') or subtitle_entry.getElementsByTagName('ads2')
                if ads_entries:
                    continue

                def try_get_first_child_data(key, default):
                    try:
                        return subtitle_entry.getElementsByTagName(
                            key)[0].firstChild.data
                    except (AttributeError, IndexError):
                        return default

                subtitle_id_entry = subtitle_entry.getElementsByTagName(
                    'IDSubtitle')[0]
                subtitle_id = subtitle_id_entry.firstChild.data
                subtitle_link = 'http://www.opensubtitles.org' + subtitle_id_entry.getAttribute(
                    'Link')
                subtitle_uuid = subtitle_id_entry.getAttribute('uuid')

                subtitlefile_id = subtitle_entry.getElementsByTagName(
                    'IDSubtitleFile')[0].firstChild.data

                user_entry = subtitle_entry.getElementsByTagName('UserID')[0]
                user_id = int(user_entry.firstChild.data)
                # user_link = 'http://www.opensubtitles.org' + user_entry.getAttribute('Link')
                user_nickname = try_get_first_child_data('UserNickName', None)

                # comment = try_get_first_child_data(''SubAuthorComment', None)

                language_entry = subtitle_entry.getElementsByTagName(
                    'ISO639')[0]
                language_iso639 = language_entry.firstChild.data
                # language_link_search = 'http://www.opensubtitles.org' + language_entry.getAttribute('LinkSearch')
                # language_flag = 'http:' + language_entry.getAttribute('flag')

                # language_name = try_get_first_child_data('LanguageName', None)

                subtitle_format = try_get_first_child_data('SubFormat', 'srt')
                # subtitle_nbcds = int(try_get_first_child_data('SubSumCD', -1))
                subtitle_add_date_locale = subtitle_entry.getElementsByTagName(
                    'SubAddDate')[0].getAttribute('locale')
                subtitle_add_date = datetime.datetime.strptime(
                    subtitle_add_date_locale, '%d/%m/%Y %H:%M:%S')
                # subtitle_bad = int(subtitle_entry.getElementsByTagName('SubBad')[0].firstChild.data)
                subtitle_rating = float(
                    subtitle_entry.getElementsByTagName('SubRating')
                    [0].firstChild.data)

                # download_count = int(try_get_first_child_data('SubDownloadsCnt', -1))
                # subtitle_movie_aka = try_get_first_child_data('SubMovieAka', None)

                # subtitle_comments = int(try_get_first_child_data('SubComments', -1))
                # subtitle_total = int(try_get_first_child_data('TotalSubs', -1)) #PRESENT?
                # subtitle_newest = try_get_first_child_data('Newest', None) #PRESENT?

                language = Language.from_xx(language_iso639)

                movie_release_name = subtitle_entry.getElementsByTagName(
                    'MovieReleaseName')[0].firstChild.data
                filename = '{}.{}'.format(movie_release_name, subtitle_format)

                download_link = 'http://www.opensubtitles.org/download/sub/{}'.format(
                    subtitle_id)
                if user_nickname:
                    uploader = user_nickname
                elif user_id != 0:
                    uploader = str(user_id)
                else:
                    uploader = None
                subtitle = OpenSubtitlesSubtitleFile(
                    filename=filename,
                    file_size=None,
                    md5_hash=subtitle_uuid,
                    id_online=subtitlefile_id,
                    download_link=download_link,
                    link=subtitle_link,
                    uploader=uploader,
                    language=language,
                    rating=subtitle_rating,
                    age=subtitle_add_date)
                subtitles.append(subtitle)
            except (AttributeError, IndexError, ValueError):
                log.warning('subtitle_entry={}'.format(subtitle_entry.toxml()))
                log.warning('XML entry has invalid format.',
                            exc_info=sys.exc_info())

        return subtitles, nb_so_far, nb_provider
示例#3
0
    def subtitle_info(self, raw_xml):
        dom = minidom.parseString(raw_xml)  # Make the dom from raw xml
        entries = dom.getElementsByTagName(
            'opensubtitles')  # Pull out all entry's
        subtitle_entries = []  # Make an empty container to fill up and return
        data = None
        for entry in entries:
            if entry.getElementsByTagName('SubBrowse'):
                for result in entry.getElementsByTagName('SubBrowse'):
                    if result.getElementsByTagName('Subtitle'):
                        data = result.getElementsByTagName('Subtitle')
                        break
                break
        # print "data=", data
        if not data:
            return []
        # catch subtitle information

        for entry in data:
            sub_obj = subtitlefile.SubtitleFile(online=True)
            sub = {}
            if entry.getElementsByTagName('LinkDetails') and entry.getElementsByTagName('LinkDetails')[0].firstChild:
                sub['LinkDetails'] = entry.getElementsByTagName(
                    'LinkDetails')[0].firstChild.data
            if entry.getElementsByTagName('IDSubtitle'):
                sub['IDSubtitle'] = {'IDSubtitle': entry.getElementsByTagName('IDSubtitle')[0].firstChild.data,
                                     'Link': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('Link'),
                                     }
                sub_obj._onlineId = sub['IDSubtitle']['IDSubtitle']
            if entry.getElementsByTagName('MovieReleaseName') and entry.getElementsByTagName('MovieReleaseName')[0].firstChild:
                sub['MovieReleaseName'] = entry.getElementsByTagName(
                    'MovieReleaseName')[0].firstChild.data
            if entry.getElementsByTagName('SubFormat') and entry.getElementsByTagName('SubFormat')[0].firstChild:
                sub['SubFormat'] = entry.getElementsByTagName(
                    'SubFormat')[0].firstChild.data
                sub_obj.setExtraInfo('format', sub['SubFormat'])
            if entry.getElementsByTagName('SubSumCD') and entry.getElementsByTagName('SubSumCD')[0].firstChild:
                sub['SubSumCD'] = entry.getElementsByTagName(
                    'SubSumCD')[0].firstChild.data
                sub_obj.setExtraInfo('totalCDs', sub['SubSumCD'])
            if entry.getElementsByTagName('SubAuthorComment') and entry.getElementsByTagName('SubAuthorComment')[0].firstChild:
                sub['SubAuthorComment'] = entry.getElementsByTagName(
                    'SubAuthorComment')[0].firstChild.data
            if entry.getElementsByTagName('SubAddDate') and entry.getElementsByTagName('SubAddDate')[0].firstChild:
                sub['SubAddDate'] = entry.getElementsByTagName(
                    'SubAddDate')[0].firstChild.data
            if entry.getElementsByTagName('SubSumVotes') and entry.getElementsByTagName('SubSumVotes')[0].firstChild:
                sub['SubSumVotes'] = entry.getElementsByTagName(
                    'SubSumVotes')[0].firstChild.data
            if entry.getElementsByTagName('SubRating') and entry.getElementsByTagName('SubRating')[0].firstChild:
                sub['SubRating'] = entry.getElementsByTagName(
                    'SubRating')[0].firstChild.data
                sub_obj.setRating(sub['SubRating'])
            if entry.getElementsByTagName('SubDownloadsCnt') and entry.getElementsByTagName('SubDownloadsCnt')[0].firstChild:
                sub['SubDownloadsCnt'] = entry.getElementsByTagName(
                    'SubDownloadsCnt')[0].firstChild.data
                sub_obj.setExtraInfo('totalDownloads', sub['SubDownloadsCnt'])
            if entry.getElementsByTagName('UserNickName') and entry.getElementsByTagName('UserNickName')[0].firstChild:
                sub['UserNickName'] = entry.getElementsByTagName(
                    'UserNickName')[0].firstChild.data
                sub_obj._uploader = sub['UserNickName']
            if entry.getElementsByTagName('LanguageName') and entry.getElementsByTagName('LanguageName')[0].firstChild:
                sub['LanguageName'] = entry.getElementsByTagName(
                    'LanguageName')[0].firstChild.data
                sub_obj.setLanguage(Language.from_xx(
                    entry.getElementsByTagName('LanguageName')[0].getAttribute('ISO639')))
            if entry.getElementsByTagName('SubtitleFile'):
                SubtitleFile = {}
                _SubtitleFile = entry.getElementsByTagName('SubtitleFile')[0]

                _File = _SubtitleFile.getElementsByTagName('File')[0]
                SubtitleFile['File'] = {'ID': _SubtitleFile.getElementsByTagName('File')[0].getAttribute('ID'),
                                        'SubActualCD': {'SubActualCD': _File.getElementsByTagName('SubActualCD')[0].firstChild.data,
                                                        'SubSize': _File.getElementsByTagName('SubActualCD')[0].getAttribute('Link'),
                                                        'MD5': _File.getElementsByTagName('SubActualCD')[0].getAttribute('MD5'),
                                                        'SubFileName': _File.getElementsByTagName('SubActualCD')[0].getAttribute('SubFileName'),
                                                        'DownloadLink': _File.getElementsByTagName('SubActualCD')[0].getAttribute('DownloadLink'),
                                                        }
                                        }

                SubtitleFile['Download'] = {'Download': _SubtitleFile.getElementsByTagName('Download')[0].firstChild.data,
                                            'DownloadLink': _SubtitleFile.getElementsByTagName('Download')[0].getAttribute('LinkDownloadBundle'),
                                            }
                sub['SubtitleFile'] = SubtitleFile
                global OnlyLink
                OnlyLink = _SubtitleFile.getElementsByTagName(
                    'Download')[0].getAttribute('LinkDownloadBundle')
                OnlyLink = ((OnlyLink.replace('dl', 'www')).replace(
                    'org/en', 'com')).replace('subb', 'sub')
            if entry.getElementsByTagName('Movie'):
                _Movie = entry.getElementsByTagName('Movie')[0]
                #sub['MovieName'] = _Movie.getElementsByTagName('MovieName')[0].firstChild.data
                sub['MovieID'] = {'MovieID': _Movie.getElementsByTagName('MovieName')[0].getAttribute('MovieID'),
                                  'Link': _Movie.getElementsByTagName('MovieName')[0].getAttribute('Link'),
                                  }
                for section in _Movie.getElementsByTagName('section'):
                    if section.getAttribute('type') == u"about":
                        for info in section.getElementsByTagName("info"):
                            if info.getElementsByTagName("web_url")[0].firstChild.data == u"http://www.imdb.com":
                                sub['MovieID']['LinkImdb'] = info.getElementsByTagName(
                                    "link_detail")[0].firstChild.data

            if entry.getElementsByTagName('FullName') and entry.getElementsByTagName('FullName')[0].firstChild:
                sub['FullName'] = entry.getElementsByTagName(
                    'FullName')[0].firstChild.data
            if entry.getElementsByTagName('ReportLink') and entry.getElementsByTagName('ReportLink')[0].firstChild:
                sub['ReportLink'] = entry.getElementsByTagName(
                    'ReportLink')[0].firstChild.data
            # just a shortcut
            sub['DownloadLink'] = sub['SubtitleFile']['File']['SubActualCD']['DownloadLink']
            Link().OneLink(OnlyLink)
            if sub:
                subtitle_entries.append(sub)
        return (subtitle_entries, sub_obj)
示例#4
0
def translators_get():
    from subdownloader.languages.language import Language
    return (Translator('Sylvestre Ledru', '*****@*****.**',
                       (Language.from_xx('fr'), )), )