示例#1
0
文件: views.py 项目: hgeg/torrentor
def subtitles(request,path):
  try:
    proc = Process.objects.filter(name=path.split('/')[0])[0]
  except:
    return '/archive/notFound.vtt'
  if proc.subp ==1:
    return proc.mlink
  try:
    opsub = OpenSubtitles()
    opsub.login('hgeg','sokoban')
  except:
    return '/archive/error.vtt'
    
  subsFile = "/archive/internal.vtt"
  try:
    data = guessFileData(path)
    if(data['type']=='tvshow'):
      sub = opsub.search_subtitles([{'sublanguageid':'eng','query':data['name'],'season':data['season'],'episode':data['episode']}])[0]
    else:
      sub = opsub.search_subtitles([{'sublanguageid':'eng','query':data['name']}])[0]
    subsFile = createFile(path,sub['ZipDownloadLink'])
    proc.subp = 1
    proc.mlink = subsFile
    proc.save()
    return proc.mlink
  except: pass

  if request != None:
    return HttpResponse(str(subsFile))
  else: return subsFile
示例#2
0
def download_all_subtitles(filepath):
    dirname = os.path.dirname(filepath)
    basename = os.path.basename(filepath)
    basename_without_ext = os.path.splitext(basename)[0]
    ost = OpenSubtitles()
    ost.login(None, None)
    f = File(filepath)
    h = f.get_hash()
    results_from_hash = (ost.search_subtitles([{
        "sublanguageid": "all",
        "moviehash": h
    }]) or [])
    languages_in_results_from_hash = [
        lang_id
        for lang_id in [r.get("SubLanguageID") for r in results_from_hash]
    ]
    results_from_filename = [
        r for r in ost.search_subtitles([{
            "sublanguageid": "all",
            "query": basename_without_ext
        }])
    ]
    results_from_filename_but_not_from_hash = [
        r for r in results_from_filename if r.get("SubLanguageID")
        and r.get("SubLanguageID") not in languages_in_results_from_hash
    ]
    results = results_from_hash + results_from_filename_but_not_from_hash
    wait_before_next_chunk = False
    for chunk in _chunks(results, 10):
        sub_ids = {
            r["IDSubtitleFile"]: f'{basename_without_ext}.{r["ISO639"]}.srt'
            for r in chunk
        }

        def _download_subtitle_chunk(retries=5):
            nonlocal ost
            try:
                ost.download_subtitles(
                    [_id for _id in sub_ids.keys()],
                    override_filenames=sub_ids,
                    output_directory=dirname,
                    extension="srt",
                )
            except ProtocolError as e:
                if retries == 0:
                    raise e
                time.sleep(10)
                ost = OpenSubtitles()
                ost.login(None, None)
                _download_subtitle_chunk(retries=retries - 1)

        if wait_before_next_chunk:
            time.sleep(10)
        _download_subtitle_chunk()
        wait_before_next_chunk = True
示例#3
0
class OpenSubtitlesModel(object):
    def __init__(self):
        self.ost = OpenSubtitles('en')

    def _refresh_auth(self):
        return self.ost.login('doctest', 'doctest')

    def search_by_name_get_first_n(self, name, n):
        self._refresh_auth()

        search_result = self.ost.search_subtitles([{
            'sublanguageid': 'eng',
            'query': name
        }])
        if type(search_result) == list and len(search_result) > 0:
            parsed_result = search_result[0:n]
            return parsed_result
        else:
            return

    def search_by_name_get_most_fit(self, name):
        return self.search_by_name_get_first_n(name, 1)[0]

    def download_by_file_id(self, id_subtitle_file):
        self._refresh_auth()

        path_to_files_by_id_dict = self.ost.download_subtitles(
            [id_subtitle_file])  # Files are placed at project root
        path_to_subtitle_file = list(path_to_files_by_id_dict.values())[
            0]  # Get value from single entry dict

        return path_to_subtitle_file
示例#4
0
class OST:
    def __init__(self):
        self.api = OpenSubtitles()

    def find_max_dl_count(self, **kwargs):
        target = 0
        for i, e in enumerate(kwargs['data']):
            if int(e['IDMovieImdb']) == int(kwargs['imdb_id'].strip('t')) and \
                e['SubFormat'] == 'srt':
                if int(kwargs['data'][target]['SubDownloadsCnt']) < \
                    int(e['SubDownloadsCnt']):
                    target = i

        return target

    def link(self, **kwargs):
        token = self.api.login("doctest", 'doctest')
        data = self.api.search_subtitles([{
            'query': kwargs['query'],
            'sublanguageid': kwargs['sublanid']
        }])
        target = self.find_max_dl_count(data, kwargs['imdb_id'])

        if len(data) == 0:
            return None

        return data[target]['ZipDownloadLink']
示例#5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "file",
        help="Path to the file you want to download subtitles for",
        type=str)

    args = parser.parse_args()

    if os.path.exists(args.file):
        f = File(os.path.abspath(args.file))
        path, file = os.path.split(args.file)
        path_full_nofile = os.path.abspath(path)
        path_file = file
        path_file_noext = os.path.splitext(args.file)[0]
    else:
        sys.exit("File does not exist")

    ost = OpenSubtitles()

    username = input("Please input your OpenSubtitles.org username: "******"Please input your OpenSubtitles.org password: "******"Logging in, please wait...")
    ost.login(username, password)
    print("Successfully logged in!")

    hash = f.get_hash()
    size = f.size

    print("Scouring the web for subtitles, please wait...")
    data = ost.search_subtitles([{
        'sublanguageid': 'eng',
        'moviehash': hash,
        'moviebytesize': size
    }])
    bestdic = None

    highestdownloadcount = 0
    for dict in data:
        downloadcount = int(dict.get('SubDownloadsCnt'))
        if downloadcount > highestdownloadcount:
            downloadcount = highestdownloadcount
            bestdic = dict

    print("Subtitle found in", bestdic.get('SubLanguageID'), "with",
          bestdic.get('SubDownloadsCnt'), "downloads.")
    print("Downloading subtitles, please wait...")
    urllib.request.urlretrieve(
        bestdic.get('SubDownloadLink'),
        path_full_nofile + "/" + path_file_noext + ".srt.gz")
    print("Subtitle downloaded!")
    print("Unzipping subtitle, please wait...")
    inF = gzip.open(path_full_nofile + "/" + path_file_noext + ".srt.gz", "rb")
    outF = open(path_full_nofile + "/" + path_file_noext + ".srt", "wb")
    outF.write(inF.read())
    inF.close()
    outF.close()
    os.remove(path_full_nofile + "/" + path_file_noext + ".srt.gz")
    print("Done!")
示例#6
0
def main():
    parser = argparse.ArgumentParser(description="List available subtitles for a show")
    parser.add_argument("show", help="Show you want to list the subtitles for")
    parser.add_argument("-s", "--season", help="Season you want to list", type=int, default=1)
    parser.add_argument("-l", "--language", help="Language you want to list subtitles in", default='en')
    args = parser.parse_args()

    os = OpenSubtitles()
    os.login(username=None, password=None)

    show_name = args.show
    sub_language = args.language
    season = str(args.season)

    results = os.search_subtitles([{'sublanguageid': sub_language, 'query': show_name, 'season': season}])
    sorted_results = sorted(results, key=lambda x: int(x['SeriesEpisode']))

    table = []
    for result in sorted_results:
        name = result['MovieName'].strip()
        match_exact_show_name_regex = r'\"{}\"'.format(show_name)
        if re.search(match_exact_show_name_regex, name):
            username_raw = result['UserNickName']
            username = username_raw if username_raw else 'nobody'
            size = result['SubSize']
            date_raw = result['SubAddDate']
            date = datetime.strptime(date_raw, '%Y-%m-%d %H:%M:%S')
            filename = result['SubFileName']
            table.append(['-r--r--r--', '1', username, username, size,
                          date.strftime('%b %d %H:%M'), filename])

    print "total {}".format(len(table))
    print tabulate(table, tablefmt="plain")
示例#7
0
class subtitleFinder:
    def __init__(self, username="", password="", token=""):
        self.subs = OpenSubtitles()
        if token:
            self.subs.token = token  # Set token to that provided -- untested
        else:
            self.subs.login(username, password)

    #  result = [{'MatchedBy': 'imdbid', 'IDSubMovieFile': '0', 'MovieHash': '0', 'MovieByteSize': '0', 'MovieTimeMS': '0', 'IDSubtitleFile': '1952931532', 'SubFileName': 'Person.of.Interest.S01E01.HDTV.en.srt', 'SubActualCD': '1', 'SubSize': '51140', 'SubHash': '1e591e0dcfa8197e70d05ab3a68c958d', 'SubLastTS': '00:43:38', 'SubTSGroup': '2', 'IDSubtitle': '4242217', 'UserID': '419677', 'SubLanguageID': 'eng', 'SubFormat': 'srt', 'SubSumCD': '1', 'SubAuthorComment': '', 'SubAddDate': '2011-09-23 15:40:55', 'SubBad': '0', 'SubRating': '0.0', 'SubDownloadsCnt': '166380', 'MovieReleaseName': 'S01E01.HDTV.XviD-ASAP / 720p-IMMERSE', 'MovieFPS': '23.976', 'IDMovie': '83999', 'IDMovieImdb': '1941917', 'MovieName': '"Person of Interest" Pilot', 'MovieNameEng': '', 'MovieYear': '2011', 'MovieImdbRating': '8.5', 'SubFeatured': '0', 'UserNickName': 'gutoresquin', 'SubTranslator': '', 'ISO639': 'en', 'LanguageName': 'English', 'SubComments': '0', 'SubHearingImpaired': '0', 'UserRank': 'administrator', 'SeriesSeason': '1', 'SeriesEpisode': '1', 'MovieKind': 'episode', 'SubHD': '1', 'SeriesIMDBParent': '1839578', 'SubEncoding': 'UTF-8', 'SubAutoTranslation': '0', 'SubForeignPartsOnly': '0', 'SubFromTrusted': '1', 'QueryCached': 1, 'SubTSGroupHash': '17b1cfb76c2c9188e3772b16eb3b18be', 'SubDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-19b40c51/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/filead/1952931532.gz', 'ZipDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-f51c0baf/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/subad/4242217', 'SubtitlesLink': 'http://www.opensubtitles.org/en/subtitles/4242217/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/person-of-interest-pilot-en', 'QueryNumber': '0', 'QueryParameters': {'imdbid': '1941917', 'sublanguageid': 'eng'}, 'Score': 34.6638}, {'MatchedBy': 'imdbid', 'IDSubMovieFile': '0', 'MovieHash': '0', 'MovieByteSize': '0', 'MovieTimeMS': '0', 'IDSubtitleFile': '1953205010', 'SubFileName': 'person.of.interest.s01e01.dvdrip.xvid-reward.srt', 'SubActualCD': '1', 'SubSize': '43805', 'SubHash': '12ba13bf716e293c52b3fa174b3c8f1b', 'SubLastTS': '00:42:27', 'SubTSGroup': '8', 'IDSubtitle': '4657669', 'UserID': '0', 'SubLanguageID': 'eng', 'SubFormat': 'srt', 'SubSumCD': '1', 'SubAuthorComment': '', 'SubAddDate': '2012-09-09 11:22:04', 'SubBad': '0', 'SubRating': '0.0', 'SubDownloadsCnt': '10630', 'MovieReleaseName': ' person.of.interest.s01e01.dvdrip.xvid-reward', 'MovieFPS': '23.976', 'IDMovie': '83999', 'IDMovieImdb': '1941917', 'MovieName': '"Person of Interest" Pilot', 'MovieNameEng': '', 'MovieYear': '2011', 'MovieImdbRating': '8.5', 'SubFeatured': '0', 'UserNickName': '', 'SubTranslator': '', 'ISO639': 'en', 'LanguageName': 'English', 'SubComments': '0', 'SubHearingImpaired': '0', 'UserRank': '', 'SeriesSeason': '1', 'SeriesEpisode': '1', 'MovieKind': 'episode', 'SubHD': '0', 'SeriesIMDBParent': '1839578', 'SubEncoding': 'ASCII', 'SubAutoTranslation': '0', 'SubForeignPartsOnly': '0', 'SubFromTrusted': '0', 'QueryCached': 1, 'SubTSGroupHash': '80ff21163490f8d5e7d2817a6db92ceb', 'SubDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-197d0c43/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/filead/1953205010.gz', 'ZipDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-f5650bc4/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/subad/4657669', 'SubtitlesLink': 'http://www.opensubtitles.org/en/subtitles/4657669/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/person-of-interest-pilot-en', 'QueryNumber': '0', 'QueryParameters': {'imdbid': '1941917', 'sublanguageid': 'eng'}, 'Score': 10.1063}, {'MatchedBy': 'imdbid', 'IDSubMovieFile': '0', 'MovieHash': '0', 'MovieByteSize': '0', 'MovieTimeMS': '0', 'IDSubtitleFile': '1953205011', 'SubFileName': 'person.of.interest.s01e01.extended.pilot.dvdrip.xvid-reward.srt', 'SubActualCD': '1', 'SubSize': '56390', 'SubHash': '478a88c9b3e48f1b1683ec70a86d060f', 'SubLastTS': '00:54:05', 'SubTSGroup': '1', 'IDSubtitle': '4657670', 'UserID': '0', 'SubLanguageID': 'eng', 'SubFormat': 'srt', 'SubSumCD': '1', 'SubAuthorComment': '', 'SubAddDate': '2012-09-09 11:22:04', 'SubBad': '0', 'SubRating': '0.0', 'SubDownloadsCnt': '23690', 'MovieReleaseName': ' person.of.interest.s01e01.extended.pilot.dvdrip.xvid-reward', 'MovieFPS': '23.976', 'IDMovie': '83999', 'IDMovieImdb': '1941917', 'MovieName': '"Person of Interest" Pilot', 'MovieNameEng': '', 'MovieYear': '2011', 'MovieImdbRating': '8.5', 'SubFeatured': '0', 'UserNickName': '', 'SubTranslator': '', 'ISO639': 'en', 'LanguageName': 'English', 'SubComments': '0', 'SubHearingImpaired': '0', 'UserRank': '', 'SeriesSeason': '1', 'SeriesEpisode': '1', 'MovieKind': 'episode', 'SubHD': '0', 'SeriesIMDBParent': '1839578', 'SubEncoding': 'ASCII', 'SubAutoTranslation': '0', 'SubForeignPartsOnly': '0', 'SubFromTrusted': '0', 'QueryCached': 1, 'SubTSGroupHash': 'd322313b0ee4ef22a36e7020b5e7ec95', 'SubDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-197e0c44/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/filead/1953205011.gz', 'ZipDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-f55e0bbc/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/subad/4657670', 'SubtitlesLink': 'http://www.opensubtitles.org/en/subtitles/4657670/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/person-of-interest-pilot-en', 'QueryNumber': '0', 'QueryParameters': {'imdbid': '1941917', 'sublanguageid': 'eng'}, 'Score': 10.2369}, {'MatchedBy': 'imdbid', 'IDSubMovieFile': '0', 'MovieHash': '0', 'MovieByteSize': '0', 'MovieTimeMS': '0', 'IDSubtitleFile': '1953652407', 'SubFileName': 'Person of Interest  Ep 1 Pilot.srt', 'SubActualCD': '1', 'SubSize': '54011', 'SubHash': '34831fe210538fd18e8d0b1dd7b81ade', 'SubLastTS': '00:43:38', 'SubTSGroup': '2', 'IDSubtitle': '5110515', 'UserID': '0', 'SubLanguageID': 'eng', 'SubFormat': 'srt', 'SubSumCD': '1', 'SubAuthorComment': '', 'SubAddDate': '2013-07-29 13:23:14', 'SubBad': '0', 'SubRating': '0.0', 'SubDownloadsCnt': '9827', 'MovieReleaseName': ' Person of Interest  Ep 1 Pilot', 'MovieFPS': '0.000', 'IDMovie': '83999', 'IDMovieImdb': '1941917', 'MovieName': '"Person of Interest" Pilot', 'MovieNameEng': '', 'MovieYear': '2011', 'MovieImdbRating': '8.5', 'SubFeatured': '0', 'UserNickName': '', 'SubTranslator': '', 'ISO639': 'en', 'LanguageName': 'English', 'SubComments': '0', 'SubHearingImpaired': '0', 'UserRank': '', 'SeriesSeason': '1', 'SeriesEpisode': '1', 'MovieKind': 'episode', 'SubHD': '0', 'SeriesIMDBParent': '1839578', 'SubEncoding': 'UTF-8', 'SubAutoTranslation': '0', 'SubForeignPartsOnly': '0', 'SubFromTrusted': '0', 'QueryCached': 1, 'SubTSGroupHash': '17b1cfb76c2c9188e3772b16eb3b18be', 'SubDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-19b30c53/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/filead/1953652407.gz', 'ZipDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-f50d0bab/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/subad/5110515', 'SubtitlesLink': 'http://www.opensubtitles.org/en/subtitles/5110515/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/person-of-interest-pilot-en', 'QueryNumber': '0', 'QueryParameters': {'imdbid': '1941917', 'sublanguageid': 'eng'}, 'Score': 10.09827}, {'MatchedBy': 'imdbid', 'IDSubMovieFile': '0', 'MovieHash': '0', 'MovieByteSize': '0', 'MovieTimeMS': '0', 'IDSubtitleFile': '1953655535', 'SubFileName': 'Person of Interest 1x01 - Pilot.srt', 'SubActualCD': '1', 'SubSize': '59830', 'SubHash': '29281fa9d767fe24abd56bf5289995a9', 'SubLastTS': '00:55:44', 'SubTSGroup': '1', 'IDSubtitle': '5113606', 'UserID': '1683613', 'SubLanguageID': 'eng', 'SubFormat': 'srt', 'SubSumCD': '1', 'SubAuthorComment': '', 'SubAddDate': '2013-07-31 13:05:24', 'SubBad': '0', 'SubRating': '0.0', 'SubDownloadsCnt': '21434', 'MovieReleaseName': 'Person of Interest 1x01 - Pilot', 'MovieFPS': '25.000', 'IDMovie': '83999', 'IDMovieImdb': '1941917', 'MovieName': '"Person of Interest" Pilot', 'MovieNameEng': '', 'MovieYear': '2011', 'MovieImdbRating': '8.5', 'SubFeatured': '0', 'UserNickName': 'james66', 'SubTranslator': '', 'ISO639': 'en', 'LanguageName': 'English', 'SubComments': '0', 'SubHearingImpaired': '1', 'UserRank': 'sub leecher', 'SeriesSeason': '1', 'SeriesEpisode': '1', 'MovieKind': 'episode', 'SubHD': '0', 'SeriesIMDBParent': '1839578', 'SubEncoding': 'ASCII', 'SubAutoTranslation': '0', 'SubForeignPartsOnly': '0', 'SubFromTrusted': '0', 'QueryCached': 1, 'SubTSGroupHash': 'd322313b0ee4ef22a36e7020b5e7ec95', 'SubDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-19c60c58/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/filead/1953655535.gz', 'ZipDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-f51b0baf/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/subad/5113606', 'SubtitlesLink': 'http://www.opensubtitles.org/en/subtitles/5113606/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/person-of-interest-pilot-en', 'QueryNumber': '0', 'QueryParameters': {'imdbid': '1941917', 'sublanguageid': 'eng'}, 'Score': 11.21434}, {'MatchedBy': 'imdbid', 'IDSubMovieFile': '0', 'MovieHash': '0', 'MovieByteSize': '0', 'MovieTimeMS': '0', 'IDSubtitleFile': '1954390918', 'SubFileName': 'Person of Interest - 01x01 - Pilot.Web-DL.English.C.updated.Addic7ed.com.srt', 'SubActualCD': '1', 'SubSize': '47522', 'SubHash': '8ad5288e042ffdbdfdf7b7daf514b7b7', 'SubLastTS': '00:43:00', 'SubTSGroup': '8', 'IDSubtitle': '5812638', 'UserID': '0', 'SubLanguageID': 'eng', 'SubFormat': 'srt', 'SubSumCD': '1', 'SubAuthorComment': '', 'SubAddDate': '2014-09-05 00:10:54', 'SubBad': '0', 'SubRating': '0.0', 'SubDownloadsCnt': '5324', 'MovieReleaseName': ' Person of Interest - 01x01 - Pilot.Web-DL.English.C.updated.Addic7ed.com', 'MovieFPS': '0.000', 'IDMovie': '83999', 'IDMovieImdb': '1941917', 'MovieName': '"Person of Interest" Pilot', 'MovieNameEng': '', 'MovieYear': '2011', 'MovieImdbRating': '8.5', 'SubFeatured': '0', 'UserNickName': '', 'SubTranslator': '', 'ISO639': 'en', 'LanguageName': 'English', 'SubComments': '0', 'SubHearingImpaired': '0', 'UserRank': '', 'SeriesSeason': '1', 'SeriesEpisode': '1', 'MovieKind': 'episode', 'SubHD': '0', 'SeriesIMDBParent': '1839578', 'SubEncoding': 'ASCII', 'SubAutoTranslation': '0', 'SubForeignPartsOnly': '0', 'SubFromTrusted': '0', 'QueryCached': 1, 'SubTSGroupHash': '80ff21163490f8d5e7d2817a6db92ceb', 'SubDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-19c60c5a/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/filead/1954390918.gz', 'ZipDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-f5490bba/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/subad/5812638', 'SubtitlesLink': 'http://www.opensubtitles.org/en/subtitles/5812638/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/person-of-interest-pilot-en', 'QueryNumber': '0', 'QueryParameters': {'imdbid': '1941917', 'sublanguageid': 'eng'}, 'Score': 10.05324}, {'MatchedBy': 'imdbid', 'IDSubMovieFile': '0', 'MovieHash': '0', 'MovieByteSize': '0', 'MovieTimeMS': '0', 'IDSubtitleFile': '1955349491', 'SubFileName': 'Person.of.Interest.S01E01.720p.HDTV.ReEnc-Max.srt', 'SubActualCD': '1', 'SubSize': '46822', 'SubHash': '72ac04035e444e22927b2020f930b9fc', 'SubLastTS': '00:42:32', 'SubTSGroup': '2', 'IDSubtitle': '6769377', 'UserID': '4332442', 'SubLanguageID': 'eng', 'SubFormat': 'srt', 'SubSumCD': '1', 'SubAuthorComment': '', 'SubAddDate': '2016-10-20 15:17:38', 'SubBad': '0', 'SubRating': '0.0', 'SubDownloadsCnt': '1471', 'MovieReleaseName': ' Person.of.Interest.S01E01.720p.HDTV.ReEnc-Max', 'MovieFPS': '23.976', 'IDMovie': '83999', 'IDMovieImdb': '1941917', 'MovieName': '"Person of Interest" Pilot', 'MovieNameEng': '', 'MovieYear': '2011', 'MovieImdbRating': '8.5', 'SubFeatured': '0', 'UserNickName': 'red1jhon', 'SubTranslator': '', 'ISO639': 'en', 'LanguageName': 'English', 'SubComments': '0', 'SubHearingImpaired': '0', 'UserRank': 'bronze member', 'SeriesSeason': '1', 'SeriesEpisode': '1', 'MovieKind': 'episode', 'SubHD': '1', 'SeriesIMDBParent': '1839578', 'SubEncoding': 'ASCII', 'SubAutoTranslation': '0', 'SubForeignPartsOnly': '0', 'SubFromTrusted': '0', 'QueryCached': 1, 'SubTSGroupHash': '17b1cfb76c2c9188e3772b16eb3b18be', 'SubDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-19d20c5b/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/filead/1955349491.gz', 'ZipDownloadLink': 'http://dl.opensubtitles.org/en/download/src-api/vrf-f57d0bc6/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/subad/6769377', 'SubtitlesLink': 'http://www.opensubtitles.org/en/subtitles/6769377/sid-wOog0I4sWDjuitBOr-UG5SbNRZ8/person-of-interest-pilot-en', 'QueryNumber': '0', 'QueryParameters': {'imdbid': '1941917', 'sublanguageid': 'eng'}, 'Score': 16.01471}]

    def _search(self, imdb_id, language="eng"):
        return self.subs.search_subtitles([{
            "sublanguageid": language,
            "imdbid": imdb_id
        }])

    def _search_multiple(self, imdb_ids, language="eng"):
        request = []
        for imdb_id in imdb_ids:
            request.append({"sublanguageid": language, "imdbid": imdb_id})
        return self.subs.search_subtitles(request)

    def get_sub_link(self, imdb_id, language="eng"):
        result = self._search(imdb_id, language)
        sorted1 = sortby(result, "SubDownloadsCnt")
        return sorted1[0]["SubDownloadLink"]

    def get_sub_links(self, imdb_ids, language="eng"):
        results = self._search_multiple(imdb_ids, language)
        separated = self._separate_episodes(imdb_ids, results)
        for key in separated:
            sorted1 = sortby(separated[key], "SubDownloadsCnt")
            #[print(x["SubFileName"],x["SubDownloadsCnt"]) for x in sorted1]
            #print(sorted1[0]["SubFileName"],sorted1[0]["SubDownloadLink"])
            separated[key] = sorted1[0]["SubDownloadLink"]
        return separated

    def _separate_episodes(self, imdb_ids, array):
        separated = {}
        for sub in array:
            try:
                separated[sub["IDMovieImdb"]].append(sub)
            except KeyError:
                separated[sub["IDMovieImdb"]] = [sub]
        return separated
示例#8
0
def main(video):
    log("info", "Loaded module subtitles")
    # Logging in to OST
    log("info", "Logging in to OpenSubtitles")
    ost = OpenSubtitles()
    token = ost.login(opensubtitles_username, opensubtitles_password)
    if isinstance(token, str):
        log("success", "Logged in to OpenSubtitles")
    else:
        log("critical", "Invalid username / password entered")
        return

    # Opening File
    file_path = video_path + video
    if not path.exists(file_path):
        log("critical", "Could not find specified video file '" + file_path + "'")
        return

    # Hashing file
    log("info", "Generating video hash...")
    f = File(file_path)
    hash = f.get_hash()
    log("success", "File hash generated: " + hash)

    # Searching OST
    log("info", "Querying OpenSubtitles for subtitles...")
    data = ost.search_subtitles([{'sublanguageid': 'all', 'moviehash': hash}])
    if len(data) > 0:
        log("success", f"Found {len(data)} results.")
    else:
        log("warning", "No results found.")
        # TODO: Implement series and episode-based downloading
        return
    subtitle_id = data[0]["IDSubtitleFile"]
    log("info", "Attempting download of subtitles with ID " + str(subtitle_id))
    try:
        if isinstance(ost.download_subtitles([subtitle_id], override_filenames={subtitle_id: video + '.srt'}, output_directory=video_path, extension='srt'), dict):
            log("success", "Subtitles successfully downloaded. Enjoy your video!")
        else:
            log("critical", "Subtitle download failed.")
            return
    except:
        #TODO: Make neater
        log("error", "Something went wrong. Trying second option in list(?)")
        subtitle_id = data[1]["IDSubtitleFile"]
        log("info", "Attempting download of subtitles with ID " + str(subtitle_id))
        try:
            if isinstance(ost.download_subtitles([subtitle_id], override_filenames={subtitle_id: video + '.srt'}, output_directory=video_path, extension='srt'), dict):
                log("success", "Subtitles successfully downloaded. Enjoy your video!")
            else:
                log("critical", "Subtitle download failed.")
                return
        except:
            log("critical", "giving up after too many tries")
            return
示例#9
0
def main():
    pathList = []
    downloadList = []
    #if len(sys.argv) < 1 :
    #    print "Missing Arguments"
    #    quit()

    folder = "/home/Downloads/torrents"
    #print sys.argv[1]
    for root, dirs, files in os.walk(folder):
        path = root.split('/')
        #print root
        for file in files:
            if(file.endswith(".mp4") or file.endswith(".avi") or file.endswith(".mkv") ):
                print os.path.join(root,file)
                pathList.append(os.path.join(root,file))

    print "Found %d files" % len(pathList)

    if len(pathList) >= 1:
        ops = OpenSubtitles()

        token = ops.login("","")
        print token
        for subToFind in pathList:
            f =  File(subToFind)
            dirname = os.path.normpath(subToFind)

            subData = ops.search_subtitles([{'sublanguageid': 'pob','moviehash': f.get_hash() , 'moviebytesize': f.size }])

            if not subData:
                print "Sub not found for %s " % f.path
            else:
                filename = ''
                if f.path.endswith(".mp4"):
                    filename = dirname.replace('.mp4', '.srt')
                elif f.path.endswith('.mkv'):
                    filename = dirname.replace('.mkv', '.srt')
                elif f.path.endswith('.avi'):
                    filename = dirname.replace('.avi', '.srt')


                response = urllib2.urlopen(subData[0]['SubDownloadLink'])
                compressedFile = StringIO.StringIO()
                compressedFile.write(response.read())
                response.close()
                compressedFile.seek(0)
                decompressedFile = gzip.GzipFile(fileobj=compressedFile,mode='rb')

                with open(filename ,'w') as outfile:
                    outfile.write(decompressedFile.read())

                time.sleep(1)

        ops.logout()
示例#10
0
def printmovielist(query, outfile, num=300, check_if_subtitles=True):
    results = 0
    start_index = 1

    imdb_api = imdb.IMDb()

    if check_if_subtitles:
        ost = OpenSubtitles()
        token = ost.login('doctest', 'doctest')
        print(token)

    with codecs.open(outfile, "w", encoding="utf-8") as out:
        while results < num:
            page = requests.get(query + "&start={0}".format(start_index))
            soup = BeautifulSoup(page.content, "html.parser")
            titles = soup.find_all('span', class_="lister-item-header")
            idregex = re.compile("tt([0-9]*)")

            print("Revisando resultados del {0} al {1}:", start_index,
                  start_index + 49)
            for title in titles:
                movielink = title.find('a')
                movieid = idregex.search(movielink['href']).group(1)
                movietitle = title.text.replace("\n", " ").strip()

                # Se ignoran los resultados que no sean peliculas:
                movie = imdb_api.get_movie(movieid)
                if movie is None or movie['kind'] != 'movie':
                    continue

                usable = True

                # Se ignoran los resultados para los que no haya subtitulos en los lenguajes de nuestro interes:
                if check_if_subtitles:
                    languages = ['eng', 'spa', 'fre']

                    for lang in languages:
                        found = ost.search_subtitles([{
                            'sublanguageid': lang,
                            'imdbid': movieid
                        }])
                        if not found or len(found) < 1:
                            usable = False

                if usable:
                    print(movieid, movietitle)
                    out.write("{0} {1}\n".format(movieid, movietitle))
                    results += 1

                    if results == num:
                        return

            start_index += 50
示例#11
0
文件: task1.py 项目: hede0724/nlp
def crawler(title):
    ost = OpenSubtitles()
    _ = ost.login("doctest", 'doctest')
    data = ost.search_subtitles([{'query': title, 'sublanguageid': 'zht'}])
    # pp.pprint(data)
    highest_download_count = -1
    link = ''
    for d in data:
        if int(d['SubDownloadsCnt']) > highest_download_count:
            highest_download_count = int(d['SubDownloadsCnt'])
            link = d['ZipDownloadLink']
    webbrowser.open(link, new=2, autoraise=True)
    print(title, link, sep='\t', flush=True)
示例#12
0
 def download_subtitle(self):
     """
     Downloads subtitles from opensubtitles.org, in the defined language
     and stores them in a tempfile.
     Search is trying to match movie by hash and if it is unsuccessful,
     it searches by movie name.
     Only first match is considered.
     """
     ost = OpenSubtitles()
     logged_in = ost.login(self.os_username, self.os_password)
     if not logged_in:
         raise Error("Wrong opensubtitles credentials")
     # TODO refactor to consume path on event, potentially from queue
     mkv_files = [mkv for mkv in self.watch_path.glob("*.mkv")]
     subs = list()
     for movie in mkv_files:
         movie_file = File(movie.absolute())
         # search by hash, if not, by name
         ost_data = ost.search_subtitles([
             {
                 "sublanguageid":
                 self.os_language
                 if len(self.os_language) == 3 else self.os_language,
                 "moviehash":
                 movie_file.get_hash(),
             },
             {
                 "sublanguageid":
                 self.os_language
                 if len(self.os_language) == 3 else self.os_language,
                 "query":
                 movie.name,
             },
         ])
         if ost_data:
             # #  downloading first subtitle
             plain_link = ost_data[0]["SubDownloadLink"]
             sub_link_parts = plain_link.split("/download/")
             #  rebuilding link to get utf-8 subtitle
             sub_link = (sub_link_parts[0] + "/download/subencoding-utf8/" +
                         sub_link_parts[1])
             response = requests.get(sub_link)
             tmp, tmp_name = tempfile.mkstemp()
             with open(tmp, "wb") as srt_out:
                 srt_out.write(gzip.decompress(response.content))
             subs.append({"file_path": movie, "sub": tmp_name})
         else:
             subs.append({"file_path": movie, "sub": None})
     return subs
示例#13
0
def get_subtitle(filename):
    os_sub = OpenSubtitles()
    try:
        token = os_sub.login('thegyro','idontlikepasswords')
        if os.path.exists(os.path.join(source_dir,filename)):
            video_file = File(os.path.join(source_dir,filename))
            video_hash = video_file.get_hash()
            video_size = video_file.size
            sub_param = {'sublanguageid':'English','moviehash':video_hash,'moviebytesize': video_size}
            subtitles = os_sub.search_subtitles([sub_param])
            print subtitles
        else:
            raise Exception("File doesn't exist")
        
    except Exception as ex:
        print str(ex)
示例#14
0
def downloadSubs(Parameters):

    #Assign/allocate object and get token after logging in with credentials from the Parameters object

    opensubs = OpenSubtitles()
    token = opensubs.login(Parameters.username, Parameters.password)
    if token is None:
        print '\n*** Login failed! ***\n'
        sys.exit()
    #Get hash and size of file from Parameters object
    f = File(os.path.join(Parameters.path, Parameters.video))
    print '\tPath: %s' % Parameters.path
    print '\tFile: %s' % Parameters.video
    hash = f.get_hash()
    size = f.get_size()

    #Search subtitles DB using file hash and size. Looks like the first result is the best matching result
    data = opensubs.search_subtitles([{'sublanguageid': 'eng', 'moviehash': hash, 'moviebytesize': size}])
    if data:

        #Download first result, decode it from BASE64, add gz extension, save file
        download = opensubs.download_subtitles([data[0]['IDSubtitleFile']])
        data_decoded = base64.b64decode(unicode(download[0]['data']))
        gz_file = os.path.join(Parameters.path, Parameters.subtitle) + '.gz'
        print '\nCreating gz file: %s' % gz_file
        download_file = open(gz_file,'w')
        download_file.write(data_decoded)
        download_file.close()
        print 'Created gz file: %s' % gz_file

        #Extract SRT file from gz file and place it in the same folder
        print 'Opening gz file: %s' % gz_file
        srt_file_buffer = gzip.open(gz_file, 'r')
        srt_file_name = os.path.join(Parameters.path, Parameters.subtitle)
        print 'Creating SRT file: %s' % srt_file_name
        srt_file = open(srt_file_name,'w')
        srt_file.write(srt_file_buffer.read())
        srt_file.close()
        print 'Created SRT file: %s' % srt_file_name

        #Delete .gz file
        print 'Deleting %s' % gz_file
        os.remove(gz_file)

    else:
        print '*** No match found for file! ***'
示例#15
0
文件: subdl.py 项目: agurka/subdl
def main():
    ost = OpenSubtitles()
    token = ost.login(LOGIN, PASSWORD)
    subs = get_list()
    id_list = []
    filenames = dict()
    for sub in subs:
        sub_name = get_filename(sub)
        #subtitle already in directory
        if sub_name in os.listdir():
            continue
        data = ost.search_subtitles([sub])
        #no sub found
        if len(data) == 0:
            continue
        sub_id = data[0]["IDSubtitleFile"]
        id_list.append(int(sub_id))
        filenames[sub_id] = sub_name
    for file in id_list:
        x = ost.download_subtitles([file], override_filenames = filenames)
class CommonMetadataOpenSubtitles(object):
    """
    Class for interfacing with Opensubtitles
    """

    def __init__(self, user_name, user_password):
        self.opensubtitles_inst = OpenSubtitles()
        self.token = self.opensubtitles_inst.login(user_name, user_password)

    def com_meta_opensub_search(self, file_name):
        f = File(file_name)
        return self.opensubtitles_inst.search_subtitles([{'sublanguageid': 'all',
                                                          'moviehash': f.get_hash(),
                                                          'moviebytesize': f.size}])

    def com_meta_opensub_ping(self):
        self.opensubtitles_inst.no_operation()

    def com_meta_opensub_logoff(self):
        self.opensubtitles_inst.logout()
示例#17
0
def main():
    # Run the program in current directory
    # Get login details from user
    ( username, password ) = get_login_credentials()
    
    # The OpenSubtitles object
    ost = OpenSubtitles()

    # Login to opensubtitles.org through the API
    print("Logging in...")
    while ost.login(username, password) is None:
        print("Login failed, try again.")
        ( username, password ) = get_login_credentials()
    
    # Creates the Subs directory if it doesn't exist
    create_subs_dir()
    # Gets a list of all the video file names in the directory (.mp4 or .mkv)
    filename_list = get_video_filenames()
    video_list = to_video_object_list(filename_list)

    print("Downloading all subs...")
    # Search and download subtitles for all videos
    for video in video_list:
        print("Downloading " + video.search_name + " S" + video.season + " E" + video.episode + "Subs")
        # Creates a list of all the searched subtitles
        search_data = ost.search_subtitles([{
            'sublanguageid': 'eng',
            'query': video.search_name.lower(),
            'season': video.season,
            'episode': video.episode
        }])
        # Finds subtitle file ID of most downloaded subtitles file
        subtitle_file_id = find_most_downloaded(search_data)
        ost.download_subtitles([subtitle_file_id], output_directory='.\\Subs\\',
                               override_filenames={str(subtitle_file_id): video.file_name + '-eng.srt'},
                               extension='srt')
    
    print("Done!")

    print("Logging out...")
    ost.logout()
示例#18
0
def download_all_subtitles(filepath):
    dirname = os.path.dirname(filepath)
    basename = os.path.basename(filepath)
    basename_without_ext = os.path.splitext(basename)[0]
    ost = OpenSubtitles()
    ost.login(None, None)
    f = File(filepath)
    h = f.get_hash()
    results = ost.search_subtitles([{"sublanguageid": "all", "moviehash": h}])
    for chunk in _chunks(results, 20):
        sub_ids = {
            r["IDSubtitleFile"]:
            f'{basename_without_ext}.{r["SubLanguageID"]}.srt'
            for r in chunk
        }
        ost.download_subtitles(
            [_id for _id in sub_ids.keys()],
            override_filenames=sub_ids,
            output_directory=dirname,
            extension="srt",
        )
示例#19
0
def descargar_sub(movieid, path, ost=None):
    if ost is None:
        ost = OpenSubtitles()
        token = ost.login('doctest', 'doctest')

    langs = ['eng', 'spa', 'fre']

    directory = os.path.join(path, movieid)
    if not os.path.isdir(directory):
        os.makedirs(directory)

    for lang in langs:
        found = ost.search_subtitles([{
            'sublanguageid': lang,
            'imdbid': movieid
        }])

        if found and len(found) > 0:
            subid = found[0].get('IDSubtitleFile')
            sub = ost.download_subtitles([subid],
                                         output_directory=directory,
                                         extension=lang + '.srt')
示例#20
0
token = ost.login(EMAIL, PASSW)

filename=input("Enter path to file: ")
sub_path = os.path.dirname(filename)
name = '.'.join(filename.split('.')[0:-1])

sname = name+'.srt'
zname = name+'.zip'

f = File(filename)
hashh = f.get_hash()
size = f.size
print(hashh)
print(size)
data = ost.search_subtitles([{'sublanguageid': 'all', 'moviehash': hashh, 'moviebytesize': size}])
assert type(data)!=None

link = data[0]['ZipDownloadLink']
print(link)

file = open(os.path.join(sub_path,zname),'wb')
s = requests.get(link)
for chunk in s.iter_content(100000):
	if chunk:
		file.write(chunk)

file.close()


with ZipFile(os.path.join(sub_path,zname), 'r') as zip: 
示例#21
0
class OpenSubs:
    def __init__(self, username, password):
        """Instantiate the shit out of this

        Arguments:
            - username (str): Your OpenSubtitles username
            - password (str): Your OpenSubtitles password

        Returns:
            - Nothing: Sets all parameters of the class.
        """
        self.username = username
        self.password = password
        self.opensubs = OpenSubtitles()
        self.token = self.opensubs.login(self.username, self.password)
        assert self.token != None, "Incorrect username/password (or something else went wrong when loging in...)"

    def subtitlesInfo(self, imdbID, language="eng"):
        """Get info from OpenSubtitles for a movie'

        Arguments:
            - imdbID (str): IMDB ID of the movie (with or without the "tt" prefix)
            - language (str): ISO 639-2 Code for the language you want the subtitles in (English by default)
                              Check http://www.loc.gov/standards/iso639-2/php/code_list.php for other language codes

        Returns:
            If subtitles found:
                - subtitlesInfo (dict): Dictionary containing information about the movie's subtitles, including download links.
            Otherwise:
                - None

        """

        # Process imdbID to remove TT
        if (imdbID.startswith("tt")):
            imdbID = imdbID[2:]

        # Create search parameters dict
        searchParams = {}
        searchParams["sublanguageid"] = language
        searchParams["imdbid"] = str(imdbID)

        # Make sure it downloaded something
        returnedDict = self.opensubs.search_subtitles([searchParams])
        assert returnedDict != None, "OpenSubtitles returned nothing, check that you input the correct IMDBid and you input the correct username and password when instantiating this object."

        # Check not null
        if not returnedDict:
            return

        # Index 0 for top result
        return returnedDict[0]

    def downloadSubtitles(self, imdbID, language="eng", outputFolder=None):
        """Download whatever "subtitlesInfo(self,imdbID, language)" outputs into outputFolder

        Arguments:
            - imdbID (str): IMDB ID of the movie (with or without the "tt" prefix)
            - language (str): ISO 639-2 Code for the language you want the subtitles in (English by default)
                              Check http://www.loc.gov/standards/iso639-2/php/code_list.php for other language codes
            - outputFolder (str): FULL path to folder where you want to download subtitles.
                                  * If (outputPath == None) then outputPath = currentWorkingDirectory/subtitles
                                  * If you specify outputFolder please make sure you're inputting the FULL (and not relative) path,
                                    as well as making sure the folder already exists

        Returns:
            If subtitles found:
                - Nothing: Downloads file that subtitlesInfo outputs into outputFolder/imdbID.sub (or whatever other format OpenSubtitles returns)
            Otherwise:
                If subtitles not available in OpenSubtitles.org return (int) -999
                If HTTP error (most likely you've reached your daily download limit return (int) HTTP error code)
        """

        # Output path shenanigans
        if outputFolder == None:
            outputFolder = os.getcwd() + "/subtitles"
            # Create subtitles folder if it doesn't exist
            try:
                os.makedirs(outputFolder)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise

        # Process imdbID to remove TT
        if (imdbID.startswith("tt")):
            imdbID = imdbID[2:]

        # Get subtitles info
        subtitlesInfo = self.subtitlesInfo(imdbID, language)

        # If no subtitles found print warning and return -999
        if not subtitlesInfo:
            print("No subtitles found for", imdbID)
            return -999

        # Generate filename and download
        isGZ = False
        isGZ_string = subtitlesInfo['SubDownloadLink'].split(".")[-1]
        if isGZ_string == "gz":
            isGZ = True
        filename = outputFolder + "/tt" + str(
            imdbID) + "." + subtitlesInfo['SubFormat']
        if isGZ:
            filename += ".gz"

        try:
            # Download
            urllib.request.urlretrieve(subtitlesInfo['SubDownloadLink'],
                                       filename)

        except Exception as e:
            # If HTTP error print error and return HTTP code (e.g. 404)
            print(e)
            return e.code
示例#22
0
    k for k, v in movies_features_to_extract.items() if 'subtitle' in v
]
movies_to_extract
# -

# Loop through movie files, save their audio on folder prep
for movie_id in movies_to_extract:

    try:
        movie_file = movie_file_dict[movie_id]
        print(movie_id)

        f = File(str(movie_file))
        data = ost.search_subtitles([{
            'sublanguageid': 'eng',
            'imdbid': movie_id[2:],
            'moviehash': f.get_hash(),
            'moviebytesize': f.size
        }])
        id_subtitle_file = data[0].get('IDSubtitleFile')

        movie_prep_folder = movies_prep_path / f"{movie_id}"
        Path(movie_prep_folder).mkdir(parents=True, exist_ok=True)

        movie_subtitle_folder = movie_prep_folder / 'subtitle/'
        Path(movie_subtitle_folder).mkdir(parents=True, exist_ok=True)

        ost.download_subtitles([id_subtitle_file],
                               output_directory=movie_subtitle_folder,
                               extension='srt')
        time.sleep(1)
示例#23
0
class OpenSubtitlesManager():
    """
        this class manages the connection, search and fetching of the subtitles.
    """
    def __init__(self,
                 opensubs_email,
                 opensubs_password,
                 path,
                 language="eng"):
        """
            init opensubtitles object, and token
        :param path: path to media file
        :param language: language of subtitles needed
        :param opensubs_email: Email of OpenSubtitles account
        :param opensubs_password:  Password of OpenSubtitles account
        """
        self.path = path
        self.language = language
        self.opensubtitles = OpenSubtitles()
        self.token = self.opensubtitles.login(opensubs_email,
                                              opensubs_password)
        self.is_auth = False
        if self.token:
            self.is_auth = True

    def run(self):
        """
        this dont need no doc
        :return:
        """
        subtitles_objects = self.search_subtitles()
        if not subtitles_objects:
            return None
        matched_sub = self.filter_results(subtitles_objects)
        self.download_subs(matched_sub)
        return True

    def search_subtitles(self):
        """
            search subtitles for a given file
        :return: Opensubtitles search data or None
        """

        try:
            movie_file = File(self.path)
        except:
            print "Error Calculating File Size"
            return False

        file_hash = movie_file.get_hash()
        if "Error" in file_hash:
            print "Error Calculating File Hash"
            return False

        if not self.is_auth:
            print "Error Connecting to OpenSubtitles"
            return False

        # search function
        searchdata = self.opensubtitles.search_subtitles([{
            'sublanguageid':
            self.language,
            'moviehash':
            file_hash,
            'moviebytesize':
            movie_file.size
        }])

        return searchdata

    # TODO: fix access deined error
    def download_subs(self, subtitles_object):
        """

        :param subtitles_object: an object recived from the opensubtitles API search function
        :return: nothing
        """

        res = urllib2.urlopen(subtitles_object['ZipDownloadLink'])

        # find extension of subtitles file
        extension = subtitles_object['SubFileName'][
            subtitles_object['SubFileName'].rindex('.'):]

        # create temp file name for downloading subtitles zip
        zippath = os.path.join(
            os.getenv("TEMP"),
            "temp_{}.zip".format(random.randint(10000, 99999)))

        try:
            with open(zippath, "wb") as z:
                z.write(res.read())
            fZip = ZipFile(zippath, 'r')

            # extract subtitles
            for zipobj in fZip.filelist:
                if extension in zipobj.filename:
                    name = zipobj.filename
                    fZip.extract(zipobj, path=os.getenv("TEMP"))

            # copy file to movie folder and change name
            shutil.copyfile(
                os.path.join(os.getenv("TEMP"), name),
                self.path.replace(self.path[self.path.rindex('.'):],
                                  extension))
        except:
            print "Error in Saving or Exracting files"

    def filter_results(self, subtitles_objects):
        """

        :param subtitles_objects: a list of subtitles objects recived from opensubtitles API search finction
        :return: best matching subtitle object
        """
        similar_dic = {}
        basename = os.path.basename(self.path)

        # main loop, iterates subtitles objects
        for sub, i in zip(subtitles_objects, range(len(subtitles_objects))):
            # using difflib to find how similar the name of the media file is to the subtitle file.
            ratio = difflib.SequenceMatcher(a=sub["SubFileName"].lower(),
                                            b=basename.lower()).ratio()

            # if ratio is more than 90% return
            if ratio > 0.9:
                return sub

            # checks if the type of movie is compatible. if so gives bonus points
            if compare_types(basename, sub["SubFileName"]):
                ratio += 0.1

            similar_dic[i] = ratio

        # return the max value of ratio.
        return subtitles_objects[max(similar_dic.iteritems(),
                                     key=operator.itemgetter(1))[0]]
示例#24
0

token = fd.login(Data.username, Data.password)
if not token:
    print("Chyba prihlaseni")
    sys.exit(1)

print(token)

from pythonopensubtitles.utils import File
f = File(os.path.join(Data.path, Data.video))
h = f.get_hash()
print("Hash: %s" % h)
print("Size: %f" % f.size)

data = fd.search_subtitles([{'sublanguageid': 'cze', 'moviehash': h, 'moviebytesize': f.size}])

import urllib2
from StringIO import StringIO
import gzip

for item in data:
    print(item['SubDownloadLink'])
    request = urllib2.Request(item['SubDownloadLink'])
    response = urllib2.urlopen(request)

    buf = StringIO(response.read())

    data = gzip.GzipFile(fileobj=buf).read()
    out = open('titles.srt', 'wb')
    out.write(data)
示例#25
0
class SubtitleFinder:

    def __init__(self, skip_subsync, ost_username, ost_password, tmdb_key):
        self.skip_subsync = skip_subsync
        self.ost = OpenSubtitles()
        try:
            self.ost.login(ost_username, ost_password)
        except Exception as e:
            logging.error("Failed to log into opensubtitles.org.")
            raise e
        self.ost_language = 'eng'
        if tmdb_key:
            self.media_searcher = MediaSearcher(tmdb_key)
        if not self.skip_subsync:
            self.subsync_parser = ffsubsync.make_parser()

    def find_and_download(self, source, imdb_id):

        # subtitles matching on hash are already synced
        subs_data = self.find_subtitles_by_hash(source)
        if subs_data:
            return self.download_subtitles(subs_data)

        parsed_media = filename_parser.parse(source)
        if imdb_id is None and self.media_searcher is not None:
            imdb_id = self.media_searcher.search(parsed_media)

        if imdb_id is not None:
            subs_data = self.find_subtitles_by_id(imdb_id)
        elif isinstance(parsed_media, ParsedMovie):
            subs_data = self.find_subtitles_for_movie(parsed_media.title)
        elif isinstance(parsed_media, ParsedSeries):
            subs_data = self.find_subtitles_for_episode(parsed_media.title, parsed_media.season, parsed_media.episode)

        # sync subs unless explicitly asked not to
        if subs_data is not None:
            subs = self.download_subtitles(subs_data)
            if self.skip_subsync:
                return subs
            else:
                return self.sync_subtitles(source, subs)

    def find_subtitles_by_hash(self, source):
        f = File(source)
        return self.find_subtitles(moviehash=f.get_hash(), moviebytesize=f.size)

    def find_subtitles_by_id(self, imdb_id):
        return self.find_subtitles(imdbid=imdb_id)

    def find_subtitles_for_movie(self, title):
        return self.find_subtitles(query=title)

    def find_subtitles_for_episode(self, title, season, episode):
        return self.find_subtitles(query=title, season=season, episode=episode)

    def find_subtitles(self, **request):
        request.update(sublanguageid=self.ost_language)
        if 'imdbid' in request and request['imdbid'][:2] == 'tt':
            request.update(imdbid=request['imdbid'][2:])
        subs_data = self.ost.search_subtitles([request])
        return subs_data

    def download_subtitles(self, subs_data):
        id_subtitle_file = subs_data[0].get('IDSubtitleFile')
        subs_dict = self.ost.download_subtitles([id_subtitle_file], return_decoded_data=True)
        raw_subs = subs_dict.get(id_subtitle_file)
        return pysubs2.SSAFile.from_string(raw_subs)

    def sync_subtitles(self, video_filename, subtitles):
        with tempfile.NamedTemporaryFile(delete=False, suffix='.srt') as tmp_unsynced:
            tmp_unsynced.write(subtitles.to_string('srt').encode())
            tmp_unsynced.close()
            with tempfile.NamedTemporaryFile(suffix='.srt') as tmp_synced:
                tmp_synced.close()
                self.run_subsync(video_filename, tmp_unsynced.name, tmp_synced.name)
                return pysubs2.load(tmp_synced.name)

    def run_subsync(self, reference, srtin, srtout):
        ffsubsync.run(self.subsync_parser.parse_args([
            reference,
            '-i', srtin,
            '-o', srtout
        ]))
movies_list = []
movies_without_reference_sub_count = 0

for file_idx, file_path in enumerate(video_paths):
    f = OstFile(file_path)
    file_hash = f.get_hash()

    print(file=sys.stderr)
    print('-------------------------------------------------------',
          file=sys.stderr)
    print('Movie `%s` with hash `%s`:' % (file_path, file_hash),
          file=sys.stderr)

    subtitle_files = ost.search_subtitles([{
        'moviehash': file_hash,
        'sublanguageid': 'eng'
    }])
    if len(subtitle_files) == 0:
        file_basename = os.path.splitext(os.path.basename(file_path))[0]
        print('Video file `%s` not registered on OpenSubtitles' % file_path,
              file=sys.stderr)
        movies_without_reference_sub_count = movies_without_reference_sub_count + 1

        continue

    correct_subtitle_file = subtitle_files[0]

    movie_name = correct_subtitle_file['MovieName']

    movie_name_normalized = to_normalized_name(movie_name)
    print('moviename is `%s`' % movie_name, file=sys.stderr)
示例#27
0
    if download_count >= DOWNLOAD_LIMIT:
        log("Download limit of {} reached.".format(DOWNLOAD_LIMIT))
        break
    imdb_id = movie['id']
    if os.path.exists("./subtitles/{}.srt".format(imdb_id)):
        # print("Subtitles for this movie {} have already been downloaded. Skipping...".format(movie['title']))
        continue

    if imdb_id in unavailable_ids:  # we already know that this id is unavailable in OpenSubtitles. Skip...
        continue

    log("=== {} ({}) ratings:{} ===".format(movie['title'], imdb_id,
                                            movie['numOfRatings']))

    subtitles = ost.search_subtitles([{
        'sublanguageid': 'eng',
        'imdbid': imdb_id[2:]
    }])
    if subtitles is None or len(subtitles) == 0:  # not found. Log and Skip...
        log_unavailable(imdb_id)
        time.sleep(SLEEP_TIME)
        continue

    subtitles = sorted(subtitles,
                       key=lambda i: int(i['SubDownloadsCnt']),
                       reverse=True)
    id_subtitle_file = subtitles[0].get('IDSubtitleFile')
    override_filenames[id_subtitle_file] = "{}.srt".format(imdb_id)

    log("Top subtitles file ID {} : {} downloads".format(
        id_subtitle_file, subtitles[0].get('SubDownloadsCnt')))
    try:
m = []
for i in movie:
    temp = []
    temp = i.split('\t')
    m.append(temp)
m = m[1:-1]
m

# In[3]:

h = []
for i in m:
    ost = OpenSubtitles()
    token = ost.login("doctest", 'doctest')
    data = ost.search_subtitles([{
        'query': i[0],
        'sublanguageid': 'zht',
    }])
    highest = 0
    highest_movie = 0
    for index, value in enumerate(data):
        #print(value['SubDownloadsCnt'])
        if int(value['SubDownloadsCnt']) > highest:
            highest = int(value['SubDownloadsCnt'])
            highest_movie = index
    print(highest_movie)
    h.append(highest_movie)
#calculate the downloads count

# In[5]:

h[88] = 0  # index2 indicate a wrong movie
示例#29
0
class TestOpenSubtitles(unittest.TestCase):
    def setUp(self):
        self.mock = MockServerProxy()
        self.ost = OpenSubtitles()
        self.ost.xmlrpc = self.mock

    def test_login(self):
        self.mock.LogIn = lambda *_: {
            'status': '403',
        }
        assert self.ost.login('*****@*****.**', 'goodpassword') is None

        self.mock.LogIn = lambda *_: {
            'status': '200 OK',
            'token': 'token',
        }
        assert self.ost.login('*****@*****.**', 'goodpassword') == 'token'

    def test_search_subtitles(self):
        self.mock.SearchSubtitles = lambda *_: fixture('search_subtitles')

        data = self.ost.search_subtitles([])

        assert len(data) == 1
        assert data[0].get('IDSubtitle') == '7783633'
        assert data[0].get('IDSubtitleFile') == '1956355942'
        assert data[0].get('IDSubMovieFile') == '19353776'

    def test_search_imdb(self):
        self.mock.SearchMoviesOnIMDB = lambda *_: {
            'status': '200 OK',
            'data': [{
                'IDMovieImdb': 'id',
            }]
        }

        # TODO: not sure if these are the right params. I am just keeping the test because it was on the README
        data = self.ost.search_movies_on_imdb([])
        assert data[0].get('IDMovieImdb') == 'id'

    def test_no_operation(self):
        self.mock.NoOperation = lambda *_: {'status': '200 OK'}
        assert self.ost.no_operation()

    def test_logout(self):
        self.mock.LogOut = lambda *_: {'status': '403'}
        assert not self.ost.logout()

        self.mock.LogOut = lambda *_: {'status': '200 OK'}
        assert self.ost.logout()

    def test_auto_update(self):
        self.mock.AutoUpdate = lambda *_: {
            'status': '200 OK',
            'version': 'something',
        }
        data = self.ost.auto_update('SubDownloader')
        assert 'version' in data.keys()

    def test_already_exists(self):
        self.mock.TryUploadSubtitles = lambda *_: {
            'status': '403',
        }
        # TODO: The error here is unauthorized and not that the subtitle wasn't found,
        # however, for not breaking compatibility we will keep it this way for now.
        assert not self.ost.try_upload_subtitles([])

        self.mock.TryUploadSubtitles = lambda *_: {
            'status': '200 OK',
            'alreadyindb': 1,
        }
        assert self.ost.try_upload_subtitles([])

    def test_upload_subtitles(self):
        self.mock.UploadSubtitles = lambda *_: {
            'status': '200 OK',
            'data': {
                'url': 'http://example.com',
            },
        }
        data = self.ost.upload_subtitles([])
        assert 'url' in data.keys()

    def test_check_subtitle_hash(self):
        self.mock.CheckSubHash = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        data = self.ost.check_subtitle_hash([])
        assert data == {}

    def test_check_movie_hash(self):
        self.mock.CheckMovieHash = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        data = self.ost.check_movie_hash([])
        assert data == {}

    def test_insert_movie_hash(self):
        self.mock.InsertMovieHash = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        data = self.ost.insert_movie_hash([])
        assert data == {}

    def test_report_wrong_movie_hash(self):
        self.mock.ReportWrongMovieHash = lambda *_: {
            'status': '419',
        }
        assert not self.ost.report_wrong_movie_hash([])

        self.mock.ReportWrongMovieHash = lambda *_: {
            'status': '200 OK',
        }
        assert self.ost.report_wrong_movie_hash([])

    def test_report_wrong_movie_hash_404(self):
        self.mock.ReportWrongMovieHash = lambda *_: {
            'status': '404',
        }
        assert not self.ost.report_wrong_movie_hash('hash')

        self.mock.ReportWrongMovieHash = lambda *_: {
            'status': '200 OK',
        }
        assert self.ost.report_wrong_movie_hash('hash')

    def test_get_subtitle_languages(self):
        self.mock.GetSubLanguages = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        assert self.ost.get_subtitle_languages() == {}

    def test_get_available_translations(self):
        self.mock.GetAvailableTranslations = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        assert self.ost.get_available_translations('SubDownloader') == {}

    def test_subtitles_votes(self):
        self.mock.SubtitlesVote = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        assert self.ost.subtitles_votes({}) == {}

    def test_get_comments(self):
        self.mock.GetComments = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        assert self.ost.get_comments([]) == {}

    def test_add_comment(self):
        self.mock.AddComment = lambda *_: {
            'status': '403',
        }
        assert not self.ost.add_comment({})

        self.mock.AddComment = lambda *_: {
            'status': '200 OK',
        }
        assert self.ost.add_comment({})

    def test_add_request(self):
        self.mock.AddRequest = lambda *_: {
            'status': '200 OK',
            'data': {},
        }
        assert self.ost.add_request({}) == {}

    def test_download_subtitles(self):
        self.mock.DownloadSubtitles = lambda *_: fixture('download_subtitles')
        with tempfile.TemporaryDirectory() as tmpdirname:
            data = self.ost.download_subtitles(['id'],
                                               output_directory=tmpdirname)

        assert data, data

    @mock.patch('pythonopensubtitles.opensubtitles.decompress',
                return_value='test_decoded_data')
    def test_download_subtitles_force_encoding(self, mock_decompress):
        self.mock.DownloadSubtitles = lambda *_: fixture('download_subtitles')
        with tempfile.TemporaryDirectory() as tmpdirname:
            data = self.ost.download_subtitles(['id'],
                                               output_directory=tmpdirname,
                                               encoding='test_encoding')
            encoded_data = self.ost._get_from_data_or_none('data')
            mock_decompress.assert_called_with(encoded_data[0]['data'],
                                               encoding='test_encoding')
        assert data, data
示例#30
0
from pythonopensubtitles.opensubtitles import OpenSubtitles
import requests, zipfile, io
ost = OpenSubtitles()
token = ost.login("doctest", 'doctest')
data = ost.search_subtitles([{'query': 'cape no.7', 'sublanguageid': 'zht'}])
zip_file_url = data[0]['ZipDownloadLink']
r = requests.get(zip_file_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("/Users/sate/Desktop")
示例#31
0
def download(path, dir_mode=False):
    if not dir_mode:
        print("")

    print(
        colored(
            '=============================================================================',
            'yellow'))

    ost = OpenSubtitles()
    ost.login('subspy', 'subspy')

    f = File(path)

    data = ost.search_subtitles([{
        'sublanguageid': 'eng',
        'moviehash': f.get_hash(),
        'moviebytesize': f.size
    }])

    if data is None or len(data) == 0:
        print(
            colored(
                "Subtitles could not be found for " + os.path.basename(path),
                'red'))
        return

    best_match = {"index": 0, "ratio": 0}

    for current_index, search_result in enumerate(data):
        current_ratio = SequenceMatcher(
            None, search_result.get('MovieName'),
            os.path.basename(path).replace(".", " ")).ratio()

        if (current_ratio > best_match['ratio']):
            best_match['index'] = current_index
            best_match['ratio'] = current_ratio

        if (current_index > 10):
            break

    if (best_match['ratio'] < 0.5):
        print(
            colored(
                "Subtitles could not be found for " + os.path.basename(path),
                'red'))
        return

    print("[ TITLE  ] " + colored(
        data[best_match['index']].get('MovieName') + " (" +
        data[best_match['index']].get('MovieYear') + ")", 'cyan'))
    print("[ RATING ] " + colored(
        data[best_match['index']].get('MovieImdbRating') +
        "/10 on IMDb", 'cyan'))

    confidence = round(best_match['ratio'] * 100, 1)

    print("Matched with " + str(confidence) + '% confidence')

    id_subtitle_file = data[best_match['index']].get('IDSubtitleFile')

    existing_subtitle = os.path.join(
        os.path.dirname(path),
        Path(os.path.basename(path)).stem + ".srt")

    abort_flag = False
    suffix = ""

    if (os.path.isfile(existing_subtitle)):
        suffix = ".SubsPY"
        print(colored("\nSubtitles already exist for this file.", 'red'))

        if globals()['keep_all']:
            print(
                colored("Keeping both subtitles (added suffix) for ALL FILES.",
                        "cyan"))
        else:
            r = input(
                colored(
                    'Overwrite [o], keep existing [k], keep both [b] or keep both for all conflicts [a]? : ',
                    'magenta'))
            if r.lower() == "o":
                suffix = ""
                print(colored("Overwriting existing Subtitles", "cyan"))
            elif r.lower() == "k":
                print(colored("Skipping this download.", "cyan"))
                abort_flag = True
            elif r.lower() == "a":
                globals()['keep_all'] = True
                print(
                    colored(
                        "Keeping both subtitles (added suffix) for ALL FILES.",
                        "cyan"))
            else:
                print(
                    colored(
                        "Keeping both subtitles (added suffix) for this file.",
                        "cyan"))

    if not abort_flag:
        overrides = {
            id_subtitle_file:
            Path(os.path.basename(path)).stem + suffix + ".srt"
        }
        status = ost.download_subtitles([id_subtitle_file],
                                        override_filenames=overrides,
                                        output_directory=os.path.dirname(path),
                                        extension='srt')

        if status is None:
            input(
                colored(
                    "\nSubtitles could not be downloaded for " +
                    os.path.basename(path), 'red'))
            return

        print(colored("\nSubtitles downloaded successfully!", "green"))

    if not dir_mode:
        print(
            colored(
                '=============================================================================',
                'yellow'))
示例#32
0
def download_all_subtitles(filepath, skip=[]):
    dirname = os.path.dirname(filepath)
    basename = os.path.basename(filepath)
    basename_without_ext = os.path.splitext(basename)[0]
    ost = OpenSubtitles()
    ost.login(settings.OPENSUBTITLES_USERNAME, settings.OPENSUBTITLES_PASSWORD)
    f = File(filepath)
    h = f.get_hash()
    language_ids = [
        languages.get(part1=lang).part2b
        for lang in settings.SUBTITLE_LANGUAGES if lang not in skip
    ]
    results_from_hash = ([
        item for sublist in [
            ost.search_subtitles([{
                "sublanguageid": langid,
                "moviehash": h
            }]) or [] for langid in language_ids
        ] for item in sublist
    ])
    languages_in_results_from_hash = [
        lang_id
        for lang_id in [r.get("SubLanguageID") for r in results_from_hash]
    ]
    results_from_filename = ([
        item for sublist in [
            ost.search_subtitles([{
                "sublanguageid": langid,
                "query": basename_without_ext
            }]) or [] for langid in language_ids
        ] for item in sublist
    ])
    results_from_filename_but_not_from_hash = [
        r for r in results_from_filename if r.get("SubLanguageID")
        and r.get("SubLanguageID") not in languages_in_results_from_hash
    ]
    results = results_from_hash + results_from_filename_but_not_from_hash
    results = [
        r for r in results if r["ISO639"] in settings.SUBTITLE_LANGUAGES
    ]
    wait_before_next_chunk = False
    sub_filenames = []
    for chunk in _chunks(results, 10):
        sub_ids = {
            r["IDSubtitleFile"]: f'{basename_without_ext}.{r["ISO639"]}.srt'
            for r in chunk
        }
        sub_filenames = list(set(sub_filenames + list(sub_ids.values())))

        def _download_subtitle_chunk(retries=5):
            nonlocal ost
            if not sub_ids:
                return
            try:
                ost.download_subtitles(
                    [_id for _id in sub_ids.keys()],
                    override_filenames=sub_ids,
                    output_directory=dirname,
                    extension="srt",
                )
            except ProtocolError as e:
                if retries == 0:
                    raise e
                time.sleep(10)
                ost = OpenSubtitles()
                ost.login(None, None)
                _download_subtitle_chunk(retries=retries - 1)

        if wait_before_next_chunk:
            time.sleep(10)
        _download_subtitle_chunk()
        wait_before_next_chunk = True

    for sub_filename in sub_filenames:
        tmp_path = os.path.join(dirname, "fixed_" + sub_filename)
        output_path = os.path.join(dirname, sub_filename)
        os.system(
            f"timeout 5m alass '{filepath}' '{output_path}' '{tmp_path}'")
        os.system(f"mv '{tmp_path}' '{output_path}'")
示例#33
0
token = fd.login(Data.username, Data.password)
if not token:
    print("Chyba prihlaseni")
    sys.exit(1)

print(token)

from pythonopensubtitles.utils import File
f = File(os.path.join(Data.path, Data.video))
h = f.get_hash()
print("Hash: %s" % h)
print("Size: %f" % f.size)

data = fd.search_subtitles([{
    'sublanguageid': 'cze',
    'moviehash': h,
    'moviebytesize': f.size
}])

import urllib2
from StringIO import StringIO
import gzip

for item in data:
    print(item['SubDownloadLink'])
    request = urllib2.Request(item['SubDownloadLink'])
    response = urllib2.urlopen(request)

    buf = StringIO(response.read())

    data = gzip.GzipFile(fileobj=buf).read()
示例#34
0
class MediaSubtitlesPlugin(Plugin):
    """
    Plugin to get video subtitles from OpenSubtitles

    Requires:

        * **python-opensubtitles** (``pip install -e 'git+https://github.com/agonzalezro/python-opensubtitles#egg=python-opensubtitles'``)
        * **webvtt** (``pip install webvtt-py``), optional, to convert srt subtitles into vtt format ready for web streaming
        * **requests** (``pip install requests``)
    """
    def __init__(self, username, password, language=None, **kwargs):
        """
        :param username: Your OpenSubtitles username
        :type username: str

        :param password: Your OpenSubtitles password
        :type password: str

        :param language: Preferred language name, ISO639 code or OpenSubtitles
            language ID to be used for the subtitles. Also supports an (ordered)
            list of preferred languages
        :type language: str or list[str]
        """

        from pythonopensubtitles.opensubtitles import OpenSubtitles

        super().__init__(**kwargs)

        self._ost = OpenSubtitles()
        self._token = self._ost.login(username, password)
        self.languages = []
        self._file_lock = threading.RLock()

        if language:
            if isinstance(language, str):
                self.languages.append(language.lower())
            elif isinstance(language, list):
                self.languages.extend([l.lower() for l in language])
            else:
                raise AttributeError(
                    '{} is neither a string nor a list'.format(language))

    @action
    def get_subtitles(self, resource, language=None):
        """
        Get the subtitles data for a video resource

        :param resource: Media file, torrent or URL to the media resource
        :type resource: str

        :param language: Language name or code (default: configured preferred language).
            Choose 'all' for all the languages
        :type language: str
        """

        from pythonopensubtitles.utils import File

        if resource.startswith('file://'):
            resource = resource[len('file://'):]

        resource = os.path.abspath(os.path.expanduser(resource))
        if not os.path.isfile(resource):
            return None, '{} is not a valid file'.format(resource)

        file = resource
        cwd = os.getcwd()
        media_dir = os.path.dirname(resource)
        os.chdir(media_dir)
        file = file.split(os.sep)[-1]

        local_subs = [{
            'IsLocal':
            True,
            'MovieName':
            '[Local subtitle]',
            'SubFileName':
            sub.split(os.sep)[-1],
            'SubDownloadLink':
            'file://' + os.path.join(media_dir, sub),
        } for sub in find_files_by_ext(media_dir, '.srt', '.vtt')]

        self.logger.info('Found {} local subtitles for {}'.format(
            len(local_subs), file))

        languages = [language.lower()] if language else self.languages

        try:
            file_hash = File(file).get_hash()
            subs = self._ost.search_subtitles([{
                'sublanguageid': 'all',
                'moviehash': file_hash,
            }])

            subs = [
                sub for sub in subs if not languages or languages[0] == 'all'
                or sub.get('LanguageName', '').lower() in languages
                or sub.get('SubLanguageID', '').lower() in languages
                or sub.get('ISO639', '').lower() in languages
            ]

            for sub in subs:
                sub['IsLocal'] = False

            self.logger.info('Found {} OpenSubtitles items for {}'.format(
                len(subs), file))

            return local_subs + subs
        finally:
            os.chdir(cwd)

    @action
    def download(self,
                 link,
                 media_resource=None,
                 path=None,
                 convert_to_vtt=False):
        """
        Downloads a subtitle link (.srt/.vtt file or gzip/zip OpenSubtitles archive link) to the specified directory

        :param link: Local subtitles file or OpenSubtitles gzip download link
        :type link: str

        :param path: Path where the subtitle file will be downloaded (default: temporary file under /tmp)
        :type path: str

        :param media_resource: Name of the media resource. If set and if it's a
            media local file then the subtitles will be saved in the same folder
        :type media_resource: str

        :param convert_to_vtt: If set to True, then the downloaded subtitles
            will be converted to VTT format (default: no conversion)
        :type convert_to_vtt: bool

        :returns: dict.

        Format::

            {
                "filename": "/path/to/subtitle/file.srt"
            }

        """

        if link.startswith('file://'):
            link = link[len('file://'):]
        if os.path.isfile(link):
            if convert_to_vtt:
                link = self.to_vtt(link).output
            return {'filename': link}

        gzip_content = requests.get(link).content

        if not path and media_resource:
            if media_resource.startswith('file://'):
                media_resource = media_resource[len('file://'):]
            if os.path.isfile(media_resource):
                media_resource = os.path.abspath(media_resource)
                path = os.path.join(
                    os.path.dirname(media_resource), '.'.join(
                        os.path.basename(media_resource).split('.')
                        [:-1])) + '.srt'

        if path:
            f = open(path, 'wb')
        else:
            f = tempfile.NamedTemporaryFile(prefix='media_subs_',
                                            suffix='.srt',
                                            delete=False)
            path = f.name

        try:
            with f:
                f.write(gzip.decompress(gzip_content))
            if convert_to_vtt:
                path = self.to_vtt(path).output
        except Exception as e:
            os.unlink(path)
            raise e

        return {'filename': path}

    @action
    def to_vtt(self, filename):
        """
        Get the VTT content given an SRT file. Will return the original content if
        the file is already in VTT format.
        """

        if filename.lower().endswith('.vtt'):
            return filename

        import webvtt

        with self._file_lock:
            try:
                webvtt.read(filename)
                return filename
            except Exception:
                webvtt.from_srt(filename).save()
                return '.'.join(filename.split('.')[:-1]) + '.vtt'
class OpenSubtitlesManager:
    USERNAME = os.getenv('OST_USERNAME')
    PASSWORD = os.getenv('OST_PASSWORD')

    def __init__(self, lang=OST_LANG.en, dist='.data'):
        self.lang = lang
        self.dist = os.path.join(BASE_DIR, dist)
        self.client = OpenSubtitles()

    def login(self):
        token = self.client.login(self.USERNAME, self.PASSWORD)
        assert type(token) == str
        return token

    def search(self, query='matrix'):
        data = self.client.search_subtitles(
            [dict(sublanguageid=str(self.lang), query=query)])
        if data is None:
            return []
        subtitle_ids = [(d['IDSubtitleFile'], d['SubFormat']) for d in data]
        return subtitle_ids

    def ids_filter(self, subtitle_ids, chunk):
        def zipper(l):
            iteration = itertools.zip_longest(*[iter(l)] * chunk)
            return [[i for i in iter if i is not None] for iter in iteration]

        extensions = dict()
        for (sid, ext) in subtitle_ids:
            extensions[ext] = extensions.get(ext, []) + [sid]
        for ext, ids in extensions.items():
            extensions[ext] = zipper(ids)
        return extensions.items()

    def check_exists(self, subtitle_ids, ext):
        return [
            sid for sid in subtitle_ids if not os.path.isfile(
                os.path.join(self.dist, '{}.{}'.format(sid, ext)))
        ]

    def download_subtitles(self, chunk=5):
        if not os.path.exists(self.dist):
            os.makedirs(self.dist)
        token = self.login()
        subtitle_ids = self.search()
        iter = self.ids_filter(subtitle_ids, chunk)
        for ext, chunked in tqdm(iter):
            for ids in chunked:
                ids = self.check_exists(ids, ext)
                if len(ids) > 0:
                    self.client.download_subtitles(ids,
                                                   output_directory=self.dist,
                                                   extension=ext)

    def reglob(self, path, exp, invert=False):
        m = re.compile(exp)
        if invert is False:
            res = [f for f in os.listdir(path) if m.search(f)]
        else:
            res = [f for f in os.listdir(path) if not m.search(f)]
        res = [os.path.join(path, f) for f in res]
        return res

    def readfile(self, fpath):
        ext = fpath.split('.')[-1]
        with open(fpath, 'r') as f:
            content = f.readlines()
        return content, ext

    def should_ignore(self, line, patterns, invert=False):
        if line == '' or line == '&nbsp':
            return True
        for pattern in patterns:
            m = re.search(pattern, line)
            matched = m is not None
            if invert and not matched:
                return True
            if not invert and matched:
                return True
        return False

    def remove_strings(self, line, patterns):
        result = line
        for pattern in patterns:
            m = re.search(pattern, result)
            if m is not None:
                result = m.group(1)
        return result

    def parse_srt(self, content):
        ignores = [r'^[0-9:,]+ --> [0-9:,]+$', r'^[0-9]+$']
        removes = [r'\<[a-z]+\>(.*)\<\/[a-z]+\>']
        data = [
            self.remove_strings(line.replace('\n', ''), removes)
            for line in content if not self.should_ignore(line, ignores)
        ]
        data = [line for line in data if line != '']
        return data

    def parse_sub(self, content):
        removes = [r'{[0-9]+}{[0-9]+}(.*)']
        data = [
            self.remove_strings(before_remove(line, ['\n', '</i>']), removes)
            for line in content
        ]
        data = [line for line in data if line != '']
        return data

    '''ややこしいのでカット'''

    # def parse_ssa(self, content):
    #     ignores = [r'^Dialogue: (.*)']
    #     removes = [r'^Dialogue: (.*)']
    #     idx = content.index('[Events]')
    #     data = [self.remove_strings(line.replace('\n', ''), removes).split(',')
    #             for line in content[idx+3:]
    #             if not self.should_ignore(line, ignores, invert=True)]
    #     print(data)
    #     return data

    def before_remove(self, text, l):
        result = text
        for i in l:
            result = result.replace(i, '')
        return result

    def parse_smi(self, content):
        ignores = [r'\<SYNC Start\=[0-9]+\>\<P Class\=[A-Z]+\>(.*)']
        removes = [r'\<SYNC Start\=[0-9]+\>\<P Class\=[A-Z]+\>(.*)']
        data = [
            self.remove_strings(
                self.before_remove(line, ['\n', '<br>', '&nbsp']), removes)
            for line in content
            if not self.should_ignore(line, ignores, invert=True)
        ]
        data = [line for line in data if line != '']
        return data

    def parse_txt(self, content):
        ignores = [r'^\[[A-Z ]+\]', r'[0-9:,]+']
        removes = ['[br]', '\n']
        data = [
            self.before_remove(line, removes) for line in content
            if not self.should_ignore(line, ignores)
        ]
        data = [line for line in data if line != '']
        return data

    def get_sentenses(self):
        files = self.reglob(self.dist, r'\d+\.(srt|sub|smi|txt)$')
        data = []
        for file in files:
            # pickleファイルが存在する場合は、これをロードする
            ppath = '{}.pkl'.format(file)
            if os.path.exists(ppath):
                with open(ppath, 'rb') as f:
                    sentenses = pickle.load(f)
            else:
                content, ext = self.readfile(file)
                method_name = 'parse_{}'.format(ext)
                method = getattr(self, method_name)
                sentenses = method(content)
                with open(ppath, 'wb+') as f:
                    pickle.dump(sentenses, f)
            data.append(sentenses)
        return data
示例#36
0
res = {}
title = ""
imdb_movie_id_found = None
year = ""
subfilename = {}
for lang in ['eng', 'fre']:
    res[lang] = []
    done = False
    console.print(
        Markdown("> Searching OpenSubtitles for {} subtitles...".format(lang)))
    # objectif: sortir 2 sous-titres en fr, puis 2 sous-titres en eng
    # 1) essai meilleur qualité: match movie hash+size
    data = ost.search_subtitles([{
        'sublanguageid': lang,
        'moviehash': f.get_hash(),
        'moviebytesize': f.size
    }])

    def analyze_answer(hash_search):
        global data, res, lang, done, imdb_movie_id_found, title
        #console.print("{} result(s) found.".format(len(data)))
        for e in data:
            #console.print("# ",e['Score'], e['SubFileName'], e['IDMovieImdb'], e['MovieName'], e['SubAddDate'], e['SubEncoding'])
            #console.print(e['ZipDownloadLink'])
            if e['SubFormat'] == 'srt':
                #console.print("imdb movie id:", e['IDMovieImdb'])
                #console.print("imdb movie name:", e['MovieName'])
                if hash_search is True and imdb_movie_id_found is None:
                    imdb_movie_id_found = e['IDMovieImdb']
                    console.print("IMDB movie ID found:", imdb_movie_id_found)