def search_filename(filename, languages): title, year = xbmc.getCleanMovieTitle(filename) title = re.sub('&#.* ', '', title.replace("&", "and")).strip() title = ' '.join(s for s in title.split()) log(__name__, "clean title: \"%s\" (%s)" % (title, year)) try: yearval = int(year) except ValueError: yearval = 0 if re.search('[S|s]\d\d', filename): newtitle = ' '.join(s for s in title.split() if not re.search('[S|s]\d\d', s)) newtitle = '%s %s Season' % (newtitle, seasonsc[re.search( '[S|s](\d\d)', filename).group(1)]) match = re.search(r'\WS(?P<season>\d\d)E(?P<episode>\d\d)', filename, flags=re.IGNORECASE) if match is not None: tvshow = string.strip(title[:match.start('season') - 1]) season = string.lstrip(match.group('season'), '0') episode = string.lstrip(match.group('episode'), '0') search_tvshow(tvshow, season, episode, languages, filename) elif title and yearval > 1900: search_movie(title, year, languages, filename) else: search_manual(filename, languages, filename)
def search_tvshow(tvshow, season, episode, languages, filename): tvshow = prepare_search_string(tvshow) log(__name__, 'tvshow string "%s"' % tvshow) season_ordinal = seasons(season) tvshow_lookup = tvshow.lower().replace("'", "").strip(".") if tvshow_lookup in aliases: log(__name__, 'found alias for "%s"' % tvshow_lookup) tvshow = aliases[tvshow_lookup] search_string = '{tvshow} - {season_ordinal} Season'.format(**locals()) log(__name__, "Search tvshow = %s" % search_string) url = main_url + "/subtitles/searching?q=" + urllib.quote_plus( search_string) + '&r=true' content, resp = geturl(url) if content is not None: log(__name__, "Multiple tv show seasons found, searching for the right one ...") tv_show_seasonurl = find_tv_show_season(content, tvshow, season_ordinal) if tv_show_seasonurl is not None: log(__name__, "Tv show season found in list, getting subs ...") url = main_url + tv_show_seasonurl epstr = '{season}:{episode}'.format(**locals()) getallsubs(url, languages, filename, epstr)
def find_tv_show_season(content, tvshow, season): url_found = None found_urls = [] possible_matches = [] all_tvshows = [] h = HTMLParser.HTMLParser() for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL): found_title = matches.group('title') found_title = h.unescape(found_title) if matches.group('link') in found_urls: continue log(__name__, "Found tv show season on search page: %s" % found_title) found_urls.append(matches.group('link')) s = difflib.SequenceMatcher( None, string.lower(found_title + ' ' + matches.group('year')), string.lower(tvshow)) all_tvshows.append(matches.groups() + (s.ratio() * int(matches.group('numsubtitles')), )) # try to find match on title if string.find(string.lower(found_title), string.lower(tvshow)) > -1: # try to match season if string.find(string.lower(found_title), string.lower(season)) > -1: log( __name__, "Matching tv show season found on search page: %s" % found_title) possible_matches.append(matches.groups()) # try to match with season if first season (ie one season only series) elif string.lower(season) == "first" and string.find( string.lower(found_title), "season") == -1: log( __name__, "Matching tv show (no season) found on search page: %s" % found_title) possible_matches.append(matches.groups()) if len(possible_matches) > 0: possible_matches = sorted(possible_matches, key=lambda x: -int(x[3])) url_found = possible_matches[0][0] log( __name__, "Selecting matching tv show with most subtitles: %s (%s)" % (possible_matches[0][1], possible_matches[0][3])) else: if len(all_tvshows) > 0: all_tvshows = sorted(all_tvshows, key=lambda x: -int(x[4])) url_found = all_tvshows[0][0] log( __name__, "Selecting tv show with highest fuzzy string score: %s (score: %s subtitles: %s)" % (all_tvshows[0][1], all_tvshows[0][4], all_tvshows[0][3])) return url_found
def find_movie(content, title, year): found_urls = [] found_movies = [] h = HTMLParser.HTMLParser() for secmatches in re.finditer(search_section_pattern, content, re.IGNORECASE | re.DOTALL): log(__name__, secmatches.group('section')) for matches in re.finditer(movie_season_pattern, secmatches.group('content'), re.IGNORECASE | re.DOTALL): if matches.group('link') in found_urls: continue found_urls.append(matches.group('link')) found_title = matches.group('title') found_title = h.unescape(found_title) log( __name__, "Found movie on search page: %s (%s)" % (found_title, matches.group('year'))) found_movies.append({ 't': string.lower(found_title), 'y': int(matches.group('year')), 's': secmatches.group('section'), 'l': matches.group('link') }) year = int(year) title = string.lower(title) # Priority 1: matching title and year for movie in found_movies: if string.find(movie['t'], title) > -1: if movie['y'] == year: log( __name__, "Matching movie found on search page: %s (%s)" % (movie['t'], movie['y'])) return movie['l'] # Priority 2: matching title and one off year for movie in found_movies: if string.find(movie['t'], title) > -1: if movie['y'] == year + 1 or movie['y'] == year - 1: log( __name__, "Matching movie found on search page (one off year): %s (%s)" % (movie['t'], movie['y'])) return movie['l'] # Priority 3: "Exact" match according to search result page for movie in found_movies: if movie["s"] == "exact": log(__name__, "Using 'Exact' match: %s (%s)" % (movie['t'], movie['y'])) return movie['l'] return None
def OSSearch(item): search_data = [] try: search_data = OSDBServer().searchsubtitles(item) except: log(__name__, "failed to connect to service for subtitle search") xbmcgui.Dialog().ok(SCRIPT_NAME, __language__(32005), __language__(32009)) # xbmc.executebuiltin((u'Notification(%s,%s,%s,%s/icon.png)' % (SCRIPT_NAME , __language__(32009),"5000",CWD)).encode('utf-8')) return if search_data != None: search_data.sort(key=lambda x: [ not x['MatchedBy'] == 'moviehash', not os.path. splitext(x['SubFileName'])[0] == os.path.splitext( os.path.basename( urllib.unquote(xbmc.Player().getPlayingFile( ).decode('utf-8'))))[0], not normalizeString( xbmc.getInfoLabel("VideoPlayer.OriginalTitle")).lower( ) in x['SubFileName'].replace('.', ' ').lower(), not x[ 'LanguageName'] == PreferredSub ]) for item_data in search_data: ## hack to work around issue where Brazilian is not found as language in XBMC if item_data["LanguageName"] == "Brazilian": item_data["LanguageName"] = "Portuguese (Brazil)" if ((item['season'] == item_data['SeriesSeason'] and item['episode'] == item_data['SeriesEpisode']) or (item['season'] == "" and item['episode'] == "" ) ## for file search, season and episode == "" ): listitem = xbmcgui.ListItem( label=item_data["LanguageName"], label2=item_data["SubFileName"], iconImage=str(int(round(float(item_data["SubRating"]) / 2))), thumbnailImage=item_data["ISO639"]) listitem.setProperty( "sync", ("false", "true")[str(item_data["MatchedBy"]) == "moviehash"]) listitem.setProperty( "hearing_imp", ("false", "true")[int(item_data["SubHearingImpaired"]) != 0]) url = "plugin://%s/?action=download&provide=opensubtitles&link=%s&ID=%s&filename=%s&format=%s" % ( SCRIPT_ID, item_data["ZipDownloadLink"], item_data["IDSubtitleFile"], item_data["SubFileName"], item_data["SubFormat"]) xbmcplugin.addDirectoryItem(handle=int(sys.argv[1]), url=url, listitem=listitem, isFolder=False)
def search(item): filename = os.path.splitext(os.path.basename(item['file_original_path']))[0] log(__name__, "Search_subscene='%s', filename='%s', addon_version=%s" % (item, filename, __version__)) if item['mansearch']: search_manual(item['mansearchstr'], item['3let_language'], filename) elif item['tvshow']: search_tvshow(item['tvshow'], item['season'], item['episode'], item['3let_language'], filename) elif item['title'] and item['year']: search_movie(item['title'], item['year'], item['3let_language'], filename) else: search_filename(filename, item['3let_language'])
def find_movie(content, title, year): url_found = None h = HTMLParser.HTMLParser() for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL): found_title = matches.group('title') found_title = h.unescape(found_title) log(__name__, "Found movie on search page: %s (%s)" % (found_title, matches.group('year'))) if string.find(string.lower(found_title), string.lower(title)) > -1: if matches.group('year') == year: log(__name__, "Matching movie found on search page: %s (%s)" % (found_title, matches.group('year'))) url_found = matches.group('link') break return url_found
def search(item): filename = os.path.splitext(os.path.basename(item['file_original_path']))[0] log(__name__, "Search_subscene='%s', filename='%s', addon_version=%s" % (item, filename, __version__)) if item['mansearch']: search_manual(item['mansearchstr'], item['3let_language'], filename) elif item['tvshow']: search_tvshow(item['tvshow'], item['season'], item['episode'], item['3let_language'], filename) elif item['title'] and item['year']: search_movie(item['title'], item['year'], item['3let_language'], filename) elif item['title']: search_filename(item['title'], item['3let_language']) else: search_filename(filename, item['3let_language'])
def search_tvshow(tvshow, season, episode, languages, filename): tvshow = prepare_search_string(tvshow) tvshow_lookup = tvshow.lower().replace("'", "").strip(".") if tvshow_lookup in aliases: log(__name__, 'found alias for "%s"' % tvshow_lookup) tvshow = aliases[tvshow_lookup] search_string = tvshow + " - " + seasons[int(season)] + " Season" log(__name__, "Search tvshow = %s" % search_string) if sys.version_info.major == 3: url = main_url + "/subtitles/searchbytitle?query=" + urllib.parse.quote_plus( search_string) else: url = main_url + "/subtitles/searchbytitle?query=" + urllib.quote_plus( search_string) content, response_url = geturl(url) if content is not None: log(__name__, "Multiple tv show seasons found, searching for the right one ...") tv_show_seasonurl = find_tv_show_season(content, tvshow, seasons[int(season)]) if tv_show_seasonurl is not None: log(__name__, "Tv show season found in list, getting subs ...") url = main_url + tv_show_seasonurl epstr = "%d:%d" % (int(season), int(episode)) getallsubs(url, languages, filename, epstr)
def find_tv_show_season(content, tvshow, season): url_found = None possible_matches = [] all_tvshows = [] h = HTMLParser.HTMLParser() for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL): found_title = matches.group('title') found_title = h.unescape(found_title) log(__name__, "Found tv show season on search page: %s" % found_title) s = difflib.SequenceMatcher(None, string.lower(found_title + ' ' + matches.group('year')), string.lower(tvshow)) all_tvshows.append(matches.groups() + (s.ratio() * int(matches.group('numsubtitles')),)) if string.find(string.lower(found_title), string.lower(tvshow) + " ") > -1: if string.find(string.lower(found_title), string.lower(season)) > -1: log(__name__, "Matching tv show season found on search page: %s" % found_title) possible_matches.append(matches.groups()) if len(possible_matches) > 0: possible_matches = sorted(possible_matches, key=lambda x: -int(x[3])) url_found = possible_matches[0][0] log(__name__, "Selecting matching tv show with most subtitles: %s (%s)" % ( possible_matches[0][1], possible_matches[0][3])) else: if len(all_tvshows) > 0: all_tvshows = sorted(all_tvshows, key=lambda x: -int(x[4])) url_found = all_tvshows[0][0] log(__name__, "Selecting tv show with highest fuzzy string score: %s (score: %s subtitles: %s)" % ( all_tvshows[0][1], all_tvshows[0][4], all_tvshows[0][3])) return url_found
def search_filename(filename, languages): title, year = xbmc.getCleanMovieTitle(filename) log(__name__, "clean title: \"%s\" (%s)" % (title, year)) try: yearval = int(year) except ValueError: yearval = 0 match = re.search(r'\WS(?P<season>\d\d)E(?P<episode>\d\d)', filename, flags=re.IGNORECASE) if match is not None: tvshow = string.strip(title[:match.start('season') - 1]) season = string.lstrip(match.group('season'), '0') episode = string.lstrip(match.group('episode'), '0') search_tvshow(tvshow, season, episode, languages, filename) elif title and yearval > 1900: search_movie(title, year, languages, filename) else: search_manual(filename, languages, filename)
def search(item): filename = os.path.splitext(os.path.basename(item['file_original_path']))[0] log(__name__, item) if item['mansearch']: debug('mansearch') search_manual(item['mansearchstr'], item['3let_language'], filename) elif item['tvshow']: debug('tvshow') search_tvshow(item['tvshow'], item['season'], item['episode'], item['3let_language'], filename) elif item['title'] and item['year']: debug('search_movie') search_movie(item['title'], item['year'], item['3let_language'], filename) elif item['title']: debug('search_filename') search_filename(filename, item['3let_language']) else: debug('search_filename2') search_filename(filename, item['3let_language'])
def find_movie(content, title, year): found_urls = [] found_movies = [] h = HTMLParser.HTMLParser() for secmatches in re.finditer(search_section_pattern, content, re.IGNORECASE | re.DOTALL): log(__name__, secmatches.group('section')) for matches in re.finditer(movie_season_pattern, secmatches.group('content'), re.IGNORECASE | re.DOTALL): if matches.group('link') in found_urls: continue found_urls.append(matches.group('link')) found_title = matches.group('title') found_title = h.unescape(found_title) log(__name__, "Found movie on search page: %s (%s)" % (found_title, matches.group('year'))) found_movies.append( {'t': string.lower(found_title), 'y': int(matches.group('year')), 's': secmatches.group('section'), 'l': matches.group('link')}) year = int(year) title = string.lower(title) # Priority 1: matching title and year for movie in found_movies: if string.find(movie['t'], title) > -1: if movie['y'] == year: log(__name__, "Matching movie found on search page: %s (%s)" % (movie['t'], movie['y'])) return movie['l'] # Priority 2: matching title and one off year for movie in found_movies: if string.find(movie['t'], title) > -1: if movie['y'] == year + 1 or movie['y'] == year - 1: log(__name__, "Matching movie found on search page (one off year): %s (%s)" % (movie['t'], movie['y'])) return movie['l'] # Priority 3: "Exact" match according to search result page for movie in found_movies: if movie["s"] == "exact": log(__name__, "Using 'Exact' match: %s (%s)" % (movie['t'], movie['y'])) return movie['l'] return None
def find_tv_show_season(content, tvshow, season): url_found = None found_urls = [] possible_matches = [] all_tvshows = [] if sys.version_info.major == 3: import html else: html = HTMLParser.HTMLParser() for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL): found_title = matches.group('title') found_title = html.unescape(found_title) if matches.group('link') in found_urls: continue log(__name__, "Found tv show season on search page: %s" % found_title) found_urls.append(matches.group('link')) s = difflib.SequenceMatcher(None, (found_title + ' ' + matches.group('year')).lower(), tvshow.lower()) all_tvshows.append(matches.groups() + (s.ratio() * int(matches.group('numsubtitles')), )) if found_title.lower().find(tvshow.lower() + " ") > -1: if found_title.lower().find(season.lower()) > -1: log( __name__, "Matching tv show season found on search page: %s" % found_title) possible_matches.append(matches.groups()) if len(possible_matches) > 0: possible_matches = sorted(possible_matches, key=lambda x: -int(x[3])) url_found = possible_matches[0][0] log( __name__, "Selecting matching tv show with most subtitles: %s (%s)" % (possible_matches[0][1], possible_matches[0][3])) else: if len(all_tvshows) > 0: all_tvshows = sorted(all_tvshows, key=lambda x: -int(x[4])) url_found = all_tvshows[0][0] log( __name__, "Selecting tv show with highest fuzzy string score: %s (score: %s subtitles: %s)" % (all_tvshows[0][1], all_tvshows[0][4], all_tvshows[0][3])) return url_found
def search_tvshow_google_edition(tvshow, season, episode, languages, filename, year): tvshow = prepare_search_string(tvshow) season_ordinal = seasons(season) tvshow_lookup = tvshow.lower().replace("'", "").strip(".") if tvshow_lookup in aliases: log(__name__, 'found alias for "%s"' % tvshow_lookup) tvshow = aliases[tvshow_lookup] search_string = '{tvshow} - {season_ordinal} Season'.format(**locals()) log(__name__, "Search tvshow = %s" % search_string) url = "https://www.google.com/search?q=subscene.com+" + urllib.quote_plus( search_string) content, response_url = geturl(url) if content is not None: #log(__name__, "Multiple tv show seasons found, searching for the right one ...") tv_show_seasonurl = find_tv_show_season_google_edition( content, tvshow, season_ordinal, year) if tv_show_seasonurl is not None: log(__name__, "Tv show season found in list, getting subs ...") url = tv_show_seasonurl epstr = '{season}:{episode}'.format(**locals()) getallsubs(url, languages, filename, epstr)
def OSDownload(id, url, format, stack=False): log(__name__, "Download OpenSubtitles") subtitle_list = [] exts = [".srt", ".sub", ".txt", ".smi", ".ssa", ".ass"] if stack: ## we only want XMLRPC download if movie is not in stack, ## you can only retreive multiple subs in zip result = False else: subtitle = os.path.join(TEMP, "%s.%s" % (str(uuid.uuid4()), format)) try: result = OSDBServer().download(id, subtitle) except: log(__name__, "failed to connect to service for subtitle download") return subtitle_list if not result: log(__name__, "Download Using HTTP") zip = os.path.join(TEMP, "OpenSubtitles.zip") f = urllib.urlopen(url) with open(zip, "wb") as subFile: subFile.write(f.read()) subFile.close() xbmc.sleep(500) xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % ( zip, TEMP, )).encode('utf-8'), True) for file in xbmcvfs.listdir(zip)[1]: file = os.path.join(TEMP, file) if (os.path.splitext(file)[1] in exts): subtitle_list.append(file) else: subtitle_list.append(subtitle) if xbmcvfs.exists(subtitle_list[0]): return subtitle_list
def search_tvshow(tvshow, season, episode, languages, filename): tvshow = prepare_search_string(tvshow) tvshow_lookup = tvshow.lower().replace("'", "").strip(".") if tvshow_lookup in aliases: log(__name__, 'found alias for "%s"' % tvshow_lookup) tvshow = aliases[tvshow_lookup] search_string = tvshow + " - " + seasons[int(season)] + " Season" log(__name__, "Search tvshow = %s" % search_string) url = main_url + "/subtitles/title?q=" + urllib.quote_plus(search_string) + '&r=true' content, response_url = geturl(url) if content is not None: log(__name__, "Multiple tv show seasons found, searching for the right one ...") tv_show_seasonurl = find_tv_show_season(content, tvshow, seasons[int(season)]) if tv_show_seasonurl is not None: log(__name__, "Tv show season found in list, getting subs ...") url = main_url + tv_show_seasonurl epstr = "%d:%d" % (int(season), int(episode)) getallsubs(url, languages, filename, epstr)
def search_tvshow(tvshow, season, episode, languages, filename): tvshow = string.strip(tvshow) search_string = prepare_search_string(tvshow) search_string += " - " + seasons[int(season)] + " Season" log(__name__, "Search tvshow = %s" % search_string) url = main_url + "/subtitles/title?q=" + urllib.quote_plus(search_string) + '&r=true' content, response_url = geturl(url) if content is not None: log(__name__, "Multiple tv show seasons found, searching for the right one ...") tv_show_seasonurl = find_tv_show_season(content, tvshow, seasons[int(season)]) if tv_show_seasonurl is not None: log(__name__, "Tv show season found in list, getting subs ...") url = main_url + tv_show_seasonurl content, response_url = geturl(url) if content is not None: search_string = "s%#02de%#02d" % (int(season), int(episode)) getallsubs(content, languages, filename, search_string)
def search_tvshow(tvshow, season, episode, languages, filename): tvshow = prepare_search_string(tvshow) search_string = tvshow + " - " + seasons[int(season)] + " Season" log(__name__, "Search tvshow = %s" % search_string) url = main_url + "/subtitles/title?q=" + urllib.quote_plus( search_string) + '&r=true' content, response_url = geturl(url) if content is not None: log(__name__, "Multiple tv show seasons found, searching for the right one ...") tv_show_seasonurl = find_tv_show_season(content, tvshow, seasons[int(season)]) if tv_show_seasonurl is not None: log(__name__, "Tv show season found in list, getting subs ...") url = main_url + tv_show_seasonurl epstr = "%d:%d" % (int(season), int(episode)) getallsubs(url, languages, filename, epstr)
def search_movie(title, year, languages, filename): title = string.strip(title) search_string = prepare_search_string(title) log(__name__, "Search movie = %s" % search_string) url = main_url + "/subtitles/title?q=" + urllib.quote_plus(search_string) + '&r=true' content, response_url = geturl(url) if content is not None: log(__name__, "Multiple movies found, searching for the right one ...") subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url content, response_url = geturl(url) if content is not None: getallsubs(content, languages, filename) else: log(__name__, "Movie not found in list: %s" % title) if string.find(string.lower(title), "&") > -1: title = string.replace(title, "&", "and") log(__name__, "Trying searching with replacing '&' to 'and': %s" % title) subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url content, response_url = geturl(url) if content is not None: getallsubs(content, languages, filename) else: log(__name__, "Movie not found in list: %s" % title)
def search_movie(title, year, languages, filename): title = prepare_search_string(title) log(__name__, "Search movie = %s" % title) if sys.version_info.major == 3: url = main_url + "/subtitles/searchbytitle?query=" + urllib.parse.quote_plus( title) else: url = main_url + "/subtitles/searchbytitle?query=" + urllib.quote_plus( title) content, response_url = geturl(url) if content is not None: log(__name__, "Multiple movies found, searching for the right one ...") subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title) if title.lower().find("&") > -1: title = title.replace("&", "and") log(__name__, "Trying searching with replacing '&' to 'and': %s" % title) subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title)
def getallsubs(url, allowed_languages, filename="", episode=""): subtitle_pattern = ("<td class=\"a1\">\s+<a href=\"(?P<link>/subtitles/[^\"]+)\">\s+" "<span class=\"[^\"]+ (?P<quality>\w+-icon)\">\s+(?P<language>[^\r\n\t]+)\s+</span>\s+" "<span>\s+(?P<filename>[^\r\n\t]+)\s+</span>\s+" "</a>\s+</td>\s+" "<td class=\"[^\"]+\">\s+(?P<numfiles>[^\r\n\t]*)\s+</td>\s+" "<td class=\"(?P<hiclass>[^\"]+)\">" "(?:.*?)<td class=\"a6\">\s+<div>\s+(?P<comment>[^\"]+) \s*</div>") codes = get_language_codes(allowed_languages) if len(codes) < 1: xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__, __language__(32004))).encode('utf-8')) return log(__name__, 'LanguageFilter='+','.join(codes)) content, response_url = geturl(url, 'LanguageFilter='+','.join(codes)) if content is None: return subtitles = [] h = HTMLParser.HTMLParser() episode_regex = None if episode != "": episode_regex = re.compile(get_episode_pattern(episode), re.IGNORECASE) log(__name__, "regex: %s" % get_episode_pattern(episode)) for matches in re.finditer(subtitle_pattern, content, re.IGNORECASE | re.DOTALL): numfiles = 1 if matches.group('numfiles') != "": numfiles = int(matches.group('numfiles')) languagefound = matches.group('language') language_info = subscene_languages[languagefound] if language_info and language_info['3let'] in allowed_languages: link = main_url + matches.group('link') subtitle_name = string.strip(matches.group('filename')) hearing_imp = (matches.group('hiclass') == "a41") rating = '0' if matches.group('quality') == "bad-icon": continue if matches.group('quality') == "positive-icon": rating = '5' comment = re.sub("[\r\n\t]+", " ", h.unescape(string.strip(matches.group('comment')))) sync = False if filename != "" and string.lower(filename) == string.lower(subtitle_name): sync = True if episode != "": log(__name__, "match: "+subtitle_name) if episode_regex.search(subtitle_name): subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment}) elif numfiles > 2: subtitle_name = subtitle_name + ' ' + (__language__(32001) % int(matches.group('numfiles'))) subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment, 'episode': episode}) else: subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment}) subtitles.sort(key=lambda x: [not x['sync'], not x['lang']['name'] == PreferredSub]) for s in subtitles: append_subtitle(s)
def find_movie(content, title, year): found_urls = {} found_movies = [] h = HTMLParser.HTMLParser() for secmatches in re.finditer(search_section_pattern, content, re.IGNORECASE | re.DOTALL): log(__name__, secmatches.group('section')) for matches in re.finditer(movie_season_pattern, secmatches.group('content'), re.IGNORECASE | re.DOTALL): if matches.group('link') in found_urls: if secmatches.group('section') == 'close': found_movies[found_urls[matches.group('link')]]['is_close'] = True if secmatches.group('section') == 'exact': found_movies[found_urls[matches.group('link')]]['is_exact'] = True continue found_urls[matches.group('link')] = len(found_movies) found_title = matches.group('title') found_title = h.unescape(found_title) log(__name__, "Found movie on search page: %s (%s)" % (found_title, matches.group('year'))) found_movies.append( {'t': string.lower(found_title), 'y': int(matches.group('year')), 'is_exact': secmatches.group('section') == 'exact', 'is_close': secmatches.group('section') == 'close', 'l': matches.group('link'), 'c': int(matches.group('numsubtitles'))}) year = int(year) title = string.lower(title) # Priority 1: matching title and year for movie in found_movies: if string.find(movie['t'], title) > -1: if movie['y'] == year: log(__name__, "Matching movie found on search page: %s (%s)" % (movie['t'], movie['y'])) return movie['l'] # Priority 2: matching title and one off year for movie in found_movies: if string.find(movie['t'], title) > -1: if movie['y'] == year + 1 or movie['y'] == year - 1: log(__name__, "Matching movie found on search page (one off year): %s (%s)" % (movie['t'], movie['y'])) return movie['l'] # Priority 3: "Exact" match according to search result page close_movies = [] for movie in found_movies: if movie['is_exact']: log(__name__, "Using 'Exact' match: %s (%s)" % (movie['t'], movie['y'])) return movie['l'] if movie['is_close']: close_movies.append(movie) # Priority 4: "Close" match according to search result page if len(close_movies) > 0: close_movies = sorted(close_movies, key=itemgetter('c'), reverse=True) log(__name__, "Using 'Close' match: %s (%s)" % (close_movies[0]['t'], close_movies[0]['y'])) return close_movies[0]['l'] return None
def download(link, episode=""): subtitle_list = [] exts = [".srt", ".sub", ".txt", ".smi", ".ssa", ".ass"] downloadlink_pattern = "...<a href=\"(.+?)\" rel=\"nofollow\" onclick=\"DownloadSubtitle" uid = uuid.uuid4() tempdir = os.path.join(__temp__, unicode(uid)) xbmcvfs.mkdirs(tempdir) content, response_url = geturl(link) match = re.compile(downloadlink_pattern).findall(content) if match: downloadlink = main_url + match[0] viewstate = 0 previouspage = 0 subtitleid = 0 typeid = "zip" filmid = 0 postparams = urllib.urlencode( {'__EVENTTARGET': 's$lc$bcr$downloadLink', '__EVENTARGUMENT': '', '__VIEWSTATE': viewstate, '__PREVIOUSPAGE': previouspage, 'subtitleId': subtitleid, 'typeId': typeid, 'filmId': filmid}) useragent = ("User-Agent=Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) " "Gecko/20100401 Firefox/3.6.3 ( .NET CLR 3.5.30729)") headers = {'User-Agent': useragent, 'Referer': link} log(__name__, "Fetching subtitles using url '%s' with referer header '%s' and post parameters '%s'" % ( downloadlink, link, postparams)) request = urllib2.Request(downloadlink, postparams, headers) response = urllib2.urlopen(request) if response.getcode() != 200: log(__name__, "Failed to download subtitle file") return subtitle_list local_tmp_file = os.path.join(tempdir, "subscene.xxx") packed = False try: log(__name__, "Saving subtitles to '%s'" % local_tmp_file) local_file_handle = xbmcvfs.File(local_tmp_file, "wb") local_file_handle.write(response.read()) local_file_handle.close() # Check archive type (rar/zip/else) through the file header (rar=Rar!, zip=PK) myfile = xbmcvfs.File(local_tmp_file, "rb") myfile.seek(0,0) if myfile.read(1) == 'R': typeid = "rar" packed = True log(__name__, "Discovered RAR Archive") else: myfile.seek(0,0) if myfile.read(1) == 'P': typeid = "zip" packed = True log(__name__, "Discovered ZIP Archive") else: typeid = "srt" packed = False log(__name__, "Discovered a non-archive file") myfile.close() local_tmp_file = os.path.join(tempdir, "subscene." + typeid) xbmcvfs.rename(os.path.join(tempdir, "subscene.xxx"), local_tmp_file) log(__name__, "Saving to %s" % local_tmp_file) except: log(__name__, "Failed to save subtitle to %s" % local_tmp_file) if packed: xbmc.sleep(500) xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (local_tmp_file, tempdir,)).encode('utf-8'), True) episode_pattern = None if episode != '': episode_pattern = re.compile(get_episode_pattern(episode), re.IGNORECASE) for dir in xbmcvfs.listdir(tempdir)[0]: for file in xbmcvfs.listdir(os.path.join(tempdir, dir))[1]: if os.path.splitext(file)[1] in exts: log(__name__, 'match '+episode+' '+file) if episode_pattern and not episode_pattern.search(file): continue log(__name__, "=== returning subtitle file %s" % file) subtitle_list.append(os.path.join(tempdir, dir, file)) for file in xbmcvfs.listdir(tempdir)[1]: if os.path.splitext(file)[1] in exts: log(__name__, 'match '+episode+' '+file) if episode_pattern and not episode_pattern.search(file): continue log(__name__, "=== returning subtitle file %s" % file) subtitle_list.append(os.path.join(tempdir, file)) if len(subtitle_list) == 0: if episode: xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__, __language__(32002))).encode('utf-8')) else: xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__, __language__(32003))).encode('utf-8')) return subtitle_list
def download(link, episode=""): subtitle_list = [] exts = [".srt", ".sub", ".txt", ".smi", ".ssa", ".ass"] downloadlink_pattern = "...<a href=\"(.+?)\" rel=\"nofollow\" onclick=\"DownloadSubtitle" uid = uuid.uuid4() if sys.version_info.major == 3: tempdir = os.path.join(__temp__, str(uid)) else: tempdir = os.path.join(__temp__, unicode(uid)) try: os.makedirs(tempdir) except OSError: log(__name__, "Failed to create temp directory " + tempdir) else: log(__name__, "Successfully created temp directory " + tempdir) content, response_url = geturl(link) content = str(content) match = re.compile(downloadlink_pattern).findall(content) if match: downloadlink = main_url + match[0] viewstate = 0 previouspage = 0 subtitleid = 0 typeid = "zip" filmid = 0 if sys.version_info.major == 3: postparams = urllib.parse.urlencode( {'__EVENTTARGET': 's$lc$bcr$downloadLink', '__EVENTARGUMENT': '', '__VIEWSTATE': viewstate, '__PREVIOUSPAGE': previouspage, 'subtitleId': subtitleid, 'typeId': typeid, 'filmId': filmid}).encode() else: postparams = urllib.urlencode( {'__EVENTTARGET': 's$lc$bcr$downloadLink', '__EVENTARGUMENT': '', '__VIEWSTATE': viewstate, '__PREVIOUSPAGE': previouspage, 'subtitleId': subtitleid, 'typeId': typeid, 'filmId': filmid}) # useragent = ("User-Agent=Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) " # "Gecko/20100401 Firefox/3.6.3 ( .NET CLR 3.5.30729)") # useragent = ("User-Agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.89 Safari/537.36") useragent = ("Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0") headers = {'User-Agent': useragent, 'Referer': link} log(__name__, "Fetching subtitles using url '%s' with referer header '%s' and post parameters '%s'" % ( downloadlink, link, postparams)) local_tmp_file = os.path.join(tempdir, "subscene.xxx") time_interval = 3 num_of_retries = 5 if sys.version_info.major == 3: request = urllib.request.Request(downloadlink, postparams, headers) for _ in range(num_of_retries): try: response = urllib.request.urlopen(request) except: typ, val, tb = sys.exc_info() log(__name__, traceback.format_exception(typ, val, tb)) num_of_retries -= 1 # If there aren't any retries - exit loop and raise error if not num_of_retries: log(__name__, "Download failure, giving up") pass log(__name__, "Download failure, %s attempts left" % num_of_retries) time.sleep(time_interval) else: local_tmp_file = os.path.join(tempdir, "subscene.xxx") log(__name__, "Saving downloaded file to '%s'" % local_tmp_file) local_file_handle = xbmcvfs.File(local_tmp_file, "w") local_file_handle.write(bytearray(response.read())) local_file_handle.close() break else: log(__name__, "Download failure, returning empty subtitle list") return subtitle_list else: request = urllib2.Request(downloadlink, postparams, headers) for _ in range(num_of_retries): try: response = urllib2.urlopen(request) except urllib2.URLError: typ, val, tb = sys.exc_info() log(__name__, traceback.format_exception(typ, val, tb)) num_of_retries -= 1 # If there aren't any retries - exit loop and raise error if not num_of_retries: log(__name__, "Download failure, giving up") raise log(__name__, "Download failure, %s attempts left" % num_of_retries) time.sleep(time_interval) else: log(__name__, "Saving downloaded file to '%s'" % local_tmp_file) local_file_handle = xbmcvfs.File(local_tmp_file, "wb") local_file_handle.write(response.read()) local_file_handle.close() break else: raise if response is not None and response.getcode() != 200: log(__name__, "Failed to download subtitle file, HTTP error %s" % response.getcode()) return subtitle_list packed = False try: log(__name__, "Checking archive type") # Check archive type (rar/zip/else) through the file header (rar=Rar!, zip=PK) myfile = xbmcvfs.File(local_tmp_file, "rb") myfile.seek(0, 0) if myfile.read(1) == 'R': typeid = "rar" packed = True log(__name__, "Discovered RAR Archive") else: myfile.seek(0, 0) if myfile.read(1) == 'P': typeid = "zip" packed = True log(__name__, "Discovered ZIP Archive") else: typeid = "srt" packed = False log(__name__, "Discovered a non-archive file") myfile.close() local_tmp_file = os.path.join(tempdir, "subscene." + typeid) xbmcvfs.rename(os.path.join(tempdir, "subscene.xxx"), local_tmp_file) log(__name__, "Saving to %s" % local_tmp_file) except: log(__name__, "Failed to save subtitle to %s" % local_tmp_file) if packed: xbmc.sleep(500) if (sys.platform == "linux" or sys.platform == "linux2") and not 'ANDROID_ROOT' in list(os.environ.keys()): platform = "linux" log(__name__, "Platform identified as Linux") else: platform = "non-linux" log(__name__, "Platform identified as Non-Linux") if sys.version_info.major == 3: log(__name__, "Checking '%s' for subtitle files to copy" % local_tmp_file) if platform == "linux": (dirs, files) = xbmcvfs.listdir('%s' % xbmcvfs.translatePath(local_tmp_file)) if len(files) == 0: log(__name__, "Trying trailing slash") (dirs, files) = xbmcvfs.listdir('%s/' % xbmcvfs.translatePath(local_tmp_file)) if len(files) == 0: log(__name__, "Trying zip://") (dirs, files) = xbmcvfs.listdir('zip://%s/' % urllib.parse.quote_plus(local_tmp_file)) else: # Kodi on windows and possibly Android requires archive:// protocol, so testing both log(__name__, "Trying archive:\\\\") (dirs, files) = xbmcvfs.listdir('archive:\\\\%s' % xbmcvfs.translatePath(urllib.parse.quote_plus(local_tmp_file))) if len(files) == 0: log(__name__, "Trying directly") (dirs, files) = xbmcvfs.listdir('%s' % xbmcvfs.translatePath(local_tmp_file)) if len(files) == 0: log(__name__, "Trying zip://") (dirs, files) = xbmcvfs.listdir('zip://%s/' % urllib.parse.quote_plus(local_tmp_file)) for file in files: dest = os.path.join(tempdir, file) log(__name__, "=== Found subtitle file %s" % dest) if platform == "linux": # Kodi on linux does not understand 'archive://' protocol src = os.path.join(local_tmp_file, file) log(__name__, "trying to copy '%s' to '%s'" % (src, dest)) if not xbmcvfs.copy(src, dest): log(__name__, "copying failed") else: log(__name__, "copying succeeded") else: # Kodi on windows and possibly Android requires archive:// protocol, so testing both src = xbmcvfs.translatePath(os.path.join("archive:\\\\%s" % urllib.parse.quote_plus(local_tmp_file), file)) log(__name__, "trying to copy '%s' to '%s'" % (src, dest)) if not xbmcvfs.copy(src, dest): log(__name__, "copying failed") # trying again src = os.path.join(local_tmp_file, file) log(__name__, "trying to copy '%s' to '%s'" % (src, dest)) if not xbmcvfs.copy(src, dest): # trying yet again src = 'zip://%s/' % urllib.parse.quote_plus(os.path.join(local_tmp_file, file)) if not xbmcvfs.copy(src, dest): log(__name__, "copying failed") else: log(__name__, "copying succeeded using zip://") else: log(__name__, "copying succeeded using directly") else: log(__name__, "copying succeeded using archive:\\\\") subtitle_list.append(dest) else: log(__name__, "Extracting '%s' to '%s'" % (local_tmp_file, tempdir)) xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (local_tmp_file, tempdir,)).encode('utf-8'), True) for file in xbmcvfs.listdir(local_tmp_file)[1]: file = os.path.join(tempdir, file) if os.path.splitext(file)[1] in exts: log(__name__, "=== Found subtitle file %s" % file) subtitle_list.append(file) episode_pattern = None if episode != '': episode_pattern = re.compile(get_episode_pattern(episode), re.IGNORECASE) log(__name__, "Checking temp dir subfolders for subtitle files...") for dir in xbmcvfs.listdir(tempdir)[0]: log(__name__, "Check dir subfolder %s" % dir) for file in xbmcvfs.listdir(os.path.join(tempdir, dir))[1]: log(__name__, "Check dir subfolder file %s" % file) if os.path.splitext(file)[1] in exts: log(__name__, 'match '+episode+' '+file) if episode_pattern and not episode_pattern.search(file): continue log(__name__, "=== returning subtitle file %s" % file) subtitle_list.append(os.path.join(tempdir, dir, file)) log(__name__, "Checking temp dir for subtitle files...") for file in xbmcvfs.listdir(tempdir)[1]: log(__name__, "Check dir file %s" % file) if os.path.splitext(file)[1] in exts: log(__name__, 'match '+episode+' '+file) if episode_pattern and not episode_pattern.search(file): continue log(__name__, "=== returning subtitle file %s" % file) subtitle_list.append(os.path.join(tempdir, file)) if len(subtitle_list) == 0: if sys.version_info.major == 3: if episode: log(__name__, "=== Could not find matching episode in subtitle pack") xbmc.executebuiltin('Notification(%s,%s)' % (__scriptname__, __language__(32002))) else: log(__name__, "=== Download didn't contain a subtitle file") xbmc.executebuiltin('Notification(%s,%s)' % (__scriptname__, __language__(32003))) else: if episode: log(__name__, "=== Could not find matching episode in subtitle pack") xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__, __language__(32002))).encode('utf-8')) else: log(__name__, "=== Download didn't contain a subtitle file") xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__, __language__(32003))).encode('utf-8')) return subtitle_list
def search_movie(title, year, languages, filename): title = prepare_search_string(title) log(__name__, "Search movie = %s" % title) url = main_url + "/subtitles/title?q=" + urllib.quote_plus(title) + '&r=true' content, response_url = geturl(url) if content is not None: log(__name__, "Multiple movies found, searching for the right one ...") subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title) if string.find(string.lower(title), "&") > -1: title = string.replace(title, "&", "and") log(__name__, "Trying searching with replacing '&' to 'and': %s" % title) subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title)
def getallsubs(url, allowed_languages, filename="", episode=""): subtitle_pattern = ("<td class=\"a1\">\s+<a href=\"(?P<link>/subtitles/[^\"]+)\">\s+" "<span class=\"[^\"]+ (?P<quality>\w+-icon)\">\s+(?P<language>[^\r\n\t]+)\s+</span>\s+" "<span>\s+(?P<filename>[^\r\n\t]+)\s+</span>\s+" "</a>\s+</td>\s+" "<td class=\"[^\"]+\">\s+(?P<numfiles>[^\r\n\t]*)\s+</td>\s+" "<td class=\"(?P<hiclass>[^\"]+)\">" "(?:.*?)<td class=\"a6\">\s+<div>\s+(?P<comment>[^\"]+) \s*</div>") codes = get_language_codes(allowed_languages) if len(codes) < 1: xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__, __language__(32004))).encode('utf-8')) return log(__name__, 'LanguageFilter='+','.join(codes)) content, response_url = geturl(url, 'LanguageFilter='+','.join(codes)) if content is None: return subtitles = [] h = HTMLParser.HTMLParser() episode_regex = None if episode != "": episode_regex = re.compile(get_episode_pattern(episode), re.IGNORECASE) log(__name__, "regex: %s" % get_episode_pattern(episode)) for matches in re.finditer(subtitle_pattern, content, re.IGNORECASE | re.DOTALL): numfiles = 1 if matches.group('numfiles') != "": numfiles = int(matches.group('numfiles')) languagefound = matches.group('language') language_info = subscene_languages[languagefound] if language_info and language_info['3let'] in allowed_languages: link = main_url + matches.group('link') subtitle_name = string.strip(matches.group('filename')) hearing_imp = (matches.group('hiclass') == "a41") rating = '0' if matches.group('quality') == "bad-icon": continue if matches.group('quality') == "positive-icon": rating = '5' comment = re.sub("[\r\n\t]+", " ", h.unescape(string.strip(matches.group('comment')))) sync = False if filename != "" and string.lower(filename) == string.lower(subtitle_name): sync = True if episode != "": # log(__name__, "match: "+subtitle_name) if episode_regex.search(subtitle_name): subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment}) elif numfiles > 2: subtitle_name = subtitle_name + ' ' + (__language__(32001) % int(matches.group('numfiles'))) subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment, 'episode': episode}) else: subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment}) subtitles.sort(key=lambda x: [not x['sync'], not x['lang']['name'] == PreferredSub]) for s in subtitles: append_subtitle(s)
def search_movie_google_edition(title, year, languages, filename): title = prepare_search_string(title) log(__name__, "Search movie = %s" % title) url = "https://www.google.com/search?q=subscene.com+" + urllib.quote_plus(title) content, response_url = geturl(url) if content is not None: #log(__name__, "Multiple movies found, searching for the right one ...") subspage_url = find_movie_google_edition(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title) if string.find(string.lower(title), "&") > -1: title = string.replace(title, "&", "and") log(__name__, "Trying searching with replacing '&' to 'and': %s" % title) subspage_url = find_movie_google_edition(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title)
def getallsubs(url, allowed_languages, filename="", episode=""): subtitle_pattern = ("<td class=\"a1\">\s+<a href=\"(?P<link>/subtitles/[^\"]+)\">\s+" "<span class=\"[^\"]+ (?P<quality>\w+-icon)\">\s+(?P<language>[^\r\n\t]+)\s+</span>\s+" "<span>\s+(?P<filename>[^\r\n\t]+)\s+</span>\s+" "</a>\s+</td>\s+" "<td class=\"[^\"]+\">\s+(?P<numfiles>[^\r\n\t]*)\s+</td>\s+" "<td class=\"(?P<hiclass>[^\"]+)\">" "(?:.*?)<td class=\"a6\">\s+<div>\s+(?P<comment>[^\"]+) \s*</div>") codes = get_language_codes(allowed_languages) if len(codes) < 1: _xbmc_notification(32004) return log(__name__, 'LanguageFilter='+','.join(codes)) #content, response_url = geturl(url, 'LanguageFilter='+','.join(codes)) content, response_url = geturl(url) if content is None: log(__name__, 'response empty') return subtitles = [] h = HTMLParser.HTMLParser() episode_regex = None any_episode_regex = None if episode != "": episode_regex = re.compile(get_episode_pattern(episode), re.IGNORECASE) any_episode_regex = re.compile("(?:s[0-9]{2}e[0-9]{2}|\D[0-9]{1,2}x[0-9]{2})", re.IGNORECASE) log(__name__, "regex: %s" % get_episode_pattern(episode)) for matches in re.finditer(subtitle_pattern, content, re.IGNORECASE | re.DOTALL): log(__name__, "Found subtitle: %s" % matches.groupdict()) numfiles = -1 if matches.group('numfiles') != "": numfiles = int(matches.group('numfiles')) languagefound = matches.group('language') language_info = None if languagefound in subscene_languages: language_info = subscene_languages[languagefound] else: log(__name__, "not in subscene_languages: %s" % languagefound) continue log(__name__, "language_info: %s, language_info['3let']: %s, allowed_languages: %s" % (language_info, language_info['3let'], allowed_languages)) if language_info is not None and language_info['3let'] in allowed_languages: link = main_url + matches.group('link') subtitle_name = string.strip(matches.group('filename')) hearing_imp = (matches.group('hiclass') == "a41") rating = '0' if matches.group('quality') == "bad-icon": continue if matches.group('quality') == "positive-icon": rating = '5' comment = re.sub("[\r\n\t]+", " ", h.unescape(string.strip(matches.group('comment')))) sync = False if filename != "" and string.lower(filename) == string.lower(subtitle_name): sync = True if episode != "": # log(__name__, "match: "+subtitle_name) # matching episode if episode_regex.search(subtitle_name): subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment}) # multiple files elif numfiles > 2: subtitle_name = subtitle_name + ' ' + (_xmbc_localized_string_utf8(32001) % int(matches.group('numfiles'))) subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment, 'episode': episode}) # not matching any episode (?) elif not any_episode_regex.search(subtitle_name): subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment, 'episode': episode}) else: subtitles.append({'rating': rating, 'filename': subtitle_name, 'sync': sync, 'link': link, 'lang': language_info, 'hearing_imp': hearing_imp, 'comment': comment}) subtitles.sort(key=lambda x: [not x['sync'], not x['lang']['name'] == PreferredSub]) log(__name__, "subtitles count: %s" % len(subtitles)) for s in subtitles: append_subtitle(s)
def find_tv_show_season_google_edition(content, tvshow, season, year): url_found = None found_urls = [] possible_matches = [] all_tvshows = [] search_result_url_pattern = "(?P<url>https:\/\/subscene\.com\/subtitles\/.+?)[\"/&<?]" tvshow_slug = string.lower(tvshow).replace(" ", "-") h = HTMLParser.HTMLParser() for matches in re.finditer(search_result_url_pattern, content, re.IGNORECASE): found_url = matches.group('url') if found_url in found_urls: continue log(__name__, "Found match on search page: %s" % found_url) found_slug = string.lower(found_url.split("/")[-1]) found_urls.append(found_url) score = difflib.SequenceMatcher(None, found_slug, tvshow_slug).ratio() + difflib.SequenceMatcher(None, found_slug, tvshow_slug + "-" + year).ratio() all_tvshows.append([score, found_url]) # try to find match on title log(__name__, "Trying to match on title: (%s) and (%s)" % (found_slug, tvshow_slug)) if string.find(found_slug, tvshow_slug) > -1: # try to match season if string.find(string.lower(found_slug), string.lower(season)) > -1: log(__name__, "Matching tv show season found on search page: %s" % found_url) possible_matches.append([score, found_url]) # try to match with season if first season (ie one season only series) elif string.lower(season) == "first" and string.find(string.lower(found_url), "season") == -1: log(__name__, "Matching tv show (no season) found on search page: %s" % found_url) possible_matches.append([score, found_url]) if len(possible_matches) > 0: possible_matches = sorted(possible_matches, key=lambda x: -int(x[0])) url_found = possible_matches[0][1] log(__name__, "Selecting matching tv show with highest fuzzy string score: %s (%s)" % ( possible_matches[0][0], possible_matches[0][1])) else: if len(all_tvshows) > 0: all_tvshows = sorted(all_tvshows, key=lambda x: -int(x[0])) url_found = all_tvshows[0][1] log(__name__, "Selecting possible tv show with highest fuzzy string score: %s (score: %s)" % ( all_tvshows[0][0], all_tvshows[0][1])) return url_found
def find_movie_google_edition(content, title, year): found_urls = [] found_movies = [] # match https://subscene.com/subtitles/13-going-on-30 # n not https://subscene.com/subtitles/13-going-on-30/english/516887 search_result_url_pattern = "(?P<url>https:\/\/subscene\.com\/subtitles\/.+?)[\"/&<?]" h = HTMLParser.HTMLParser() for matches in re.finditer(search_result_url_pattern, content, re.IGNORECASE): found_url = matches.group('url') log(__name__, "Found match: " + found_url) if found_url in found_urls: continue found_urls.append(found_url) return found_url #found_title = matches.group('title') #found_title = h.unescape(found_title) #log(__name__, "Found movie on search page: %s (%s)" % (found_title, matches.group('year'))) #found_movies.append( # {'t': string.lower(found_title), # 'y': int(matches.group('year')), # 'is_exact': secmatches.group('section') == 'exact', # 'is_close': secmatches.group('section') == 'close', # 'l': matches.group('link'), # 'c': int(matches.group('numsubtitles'))}) #year = int(year) #title = string.lower(title) # Priority 1: matching title and year #if year > -1: # for movie in found_movies: # if string.find(movie['t'], title) > -1: # if movie['y'] == year: # log(__name__, "Matching movie found on search page: %s (%s)" % (movie['t'], movie['y'])) # return movie['l'] # Priority 2: matching title and one off year #if year > -1: # for movie in found_movies: # if string.find(movie['t'], title) > -1: # if movie['y'] == year + 1 or movie['y'] == year - 1: # log(__name__, "Matching movie found on search page (one off year): %s (%s)" % (movie['t'], movie['y'])) # return movie['l'] # Priority 3: "Exact" match according to search result page #close_movies = [] #for movie in found_movies: # if movie['is_exact']: # log(__name__, "Using 'Exact' match: %s (%s)" % (movie['t'], movie['y'])) # return movie['l'] # if movie['is_close']: # close_movies.append(movie) # Priority 4: "Close" match according to search result page #if len(close_movies) > 0: # close_movies = sorted(close_movies, key=itemgetter('c'), reverse=True) # log(__name__, "Using 'Close' match: %s (%s)" % (close_movies[0]['t'], close_movies[0]['y'])) # return close_movies[0]['l'] return None
def download(link, search_string=""): subtitle_list = [] exts = [".srt", ".sub", ".txt", ".smi", ".ssa", ".ass"] downloadlink_pattern = "...<a href=\"(.+?)\" rel=\"nofollow\" onclick=\"DownloadSubtitle" content, response_url = geturl(link) match = re.compile(downloadlink_pattern).findall(content) if match: downloadlink = main_url + match[0] viewstate = 0 previouspage = 0 subtitleid = 0 typeid = "zip" filmid = 0 postparams = urllib.urlencode( {'__EVENTTARGET': 's$lc$bcr$downloadLink', '__EVENTARGUMENT': '', '__VIEWSTATE': viewstate, '__PREVIOUSPAGE': previouspage, 'subtitleId': subtitleid, 'typeId': typeid, 'filmId': filmid}) class MyOpener(urllib.FancyURLopener): version = ("User-Agent=Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) " "Gecko/20100401 Firefox/3.6.3 ( .NET CLR 3.5.30729)") my_urlopener = MyOpener() my_urlopener.addheader('Referer', link) log(__name__, "Fetching subtitles using url '%s' with referer header '%s' and post parameters '%s'" % ( downloadlink, link, postparams)) response = my_urlopener.open(downloadlink, postparams) if xbmcvfs.exists(__temp__): shutil.rmtree(__temp__) xbmcvfs.mkdirs(__temp__) local_tmp_file = os.path.join(__temp__, "subscene.xxx") packed = False try: log(__name__, "Saving subtitles to '%s'" % local_tmp_file) local_file_handle = open(local_tmp_file, "wb") local_file_handle.write(response.read()) local_file_handle.close() #Check archive type (rar/zip/else) through the file header (rar=Rar!, zip=PK) myfile = open(local_tmp_file, "rb") myfile.seek(0) if myfile.read(1) == 'R': typeid = "rar" packed = True log(__name__, "Discovered RAR Archive") else: myfile.seek(0) if myfile.read(1) == 'P': typeid = "zip" packed = True log(__name__, "Discovered ZIP Archive") else: typeid = "srt" packed = False log(__name__, "Discovered a non-archive file") myfile.close() local_tmp_file = os.path.join(__temp__, "subscene." + typeid) os.rename(os.path.join(__temp__, "subscene.xxx"), local_tmp_file) log(__name__, "Saving to %s" % local_tmp_file) except: log(__name__, "Failed to save subtitle to %s" % local_tmp_file) if packed: xbmc.sleep(500) xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (local_tmp_file, __temp__,)).encode('utf-8'), True) for file in xbmcvfs.listdir(__temp__)[1]: file = os.path.join(__temp__, file) if os.path.splitext(file)[1] in exts: if search_string and string.find(string.lower(file), string.lower(search_string)) == -1: continue log(__name__, "=== returning subtitle file %s" % file) subtitle_list.append(file) if len(subtitle_list) == 0: if search_string: xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__ , __language__(32002))).encode('utf-8')) else: xbmc.executebuiltin((u'Notification(%s,%s)' % (__scriptname__ , __language__(32003))).encode('utf-8')) return subtitle_list
def search_manual(searchstr, languages, filename): title, year = xbmc.getCleanMovieTitle(searchstr) log(__name__, "Search movie = %s" % title) url = main_url + "/subtitles/searching?q=" + urllib.quote_plus( title) + '&r=true' content, resp = geturl(url) if content is not None: log(__name__, "Multiple movies found, searching for the right one ...") subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title) if string.find(string.lower(title), "&") > -1: title = string.replace(title, "&", "and") log(__name__, "Trying searching with replacing '&' to 'and': %s" % title) subspage_url = find_movie(content, title, year) if subspage_url is not None: log(__name__, "Movie found in list, getting subs ...") url = main_url + subspage_url getallsubs(url, languages, filename) else: log(__name__, "Movie not found in list: %s" % title) else: mess(u'Lỗi khi tìm với Subscene, thử với OpenSubtitles') OSSearch(item)
def download(link, episode=""): log(__name__, "Download Subscene") subtitle_list = [] exts = [".srt", ".sub", ".txt", ".smi", ".ssa", ".ass"] downloadlink_pattern = "...<a href=\"(.+?)\" rel=\"nofollow\" onclick=\"DownloadSubtitle" uid = uuid.uuid4() tempdir = os.path.join(TEMP, unicode(uid)) xbmcvfs.mkdirs(tempdir) content, resp = geturl(link) match = re.compile(downloadlink_pattern).findall(content) if match: downloadlink = main_url + match[0] viewstate = 0 previouspage = 0 subtitleid = 0 typeid = "zip" filmid = 0 jsondata = {"url": downloadlink} data = json.dumps(jsondata) request = urllib2.Request( "https://us-central1-kodi-thuongtin.cloudfunctions.net/subscene2", data, { 'Content-Type': 'application/json', 'Content-Length': len(data) }) response = urllib2.urlopen(request) if response.getcode() != 200: log(__name__, "Failed to download subtitle file") return subtitle_list local_tmp_file = os.path.join(tempdir, "subscene.xxx") packed = False try: log(__name__, "Saving subtitles to '%s'" % local_tmp_file) local_file_handle = xbmcvfs.File(local_tmp_file, "wb") local_file_handle.write(response.read()) # local_file_handle.write(content) local_file_handle.close() # Check archive type (rar/zip/else) through the file header (rar=Rar!, zip=PK) myfile = xbmcvfs.File(local_tmp_file, "rb") myfile.seek(0, 0) if myfile.read(1) == 'R': typeid = "rar" packed = True log(__name__, "Discovered RAR Archive") else: myfile.seek(0, 0) if myfile.read(1) == 'P': typeid = "zip" packed = True log(__name__, "Discovered ZIP Archive") else: typeid = "srt" packed = False log(__name__, "Discovered a non-archive file") myfile.close() local_tmp_file = os.path.join(tempdir, "subscene." + typeid) xbmcvfs.rename(os.path.join(tempdir, "subscene.xxx"), local_tmp_file) log(__name__, "Saving to %s" % local_tmp_file) except: log(__name__, "Failed to save subtitle to %s" % local_tmp_file) if packed: xbmc.sleep(500) xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % ( local_tmp_file, tempdir, )).encode('utf-8'), True) episode_pattern = None if episode != '': episode_pattern = re.compile(get_episode_pattern(episode), re.IGNORECASE) for dir in xbmcvfs.listdir(tempdir)[0]: for file in xbmcvfs.listdir(os.path.join(tempdir, dir))[1]: if os.path.splitext(file)[1] in exts: log(__name__, 'match ' + episode + ' ' + file) if episode_pattern and not episode_pattern.search(file): continue log(__name__, "=== returning subtitle file %s" % file) subtitle_list.append(os.path.join(tempdir, dir, file)) for file in xbmcvfs.listdir(tempdir)[1]: if os.path.splitext(file)[1] in exts: log(__name__, 'match ' + episode + ' ' + file) if episode_pattern and not episode_pattern.search(file): continue log(__name__, "=== returning subtitle file %s" % file) subtitle_list.append(os.path.join(tempdir, file)) if len(subtitle_list) == 0: if episode: _xbmc_notification(32002) else: _xbmc_notification(32003) return subtitle_list