def search(self, phrase, subtitle=None, tolerance=None, func=None): """ obj.search(phrase, subtitle=None, tolerance=None) -> result generator Returns a generator of results matching the given search phrase. A secondary phrase can be given through the 'subtitle' parameter, and an optional levenshtein tolerance value can be given for filtering results. """ if not func: if subtitle is not None: func = lambda p,s,r: levenshtein(r.subtitle, s) else: func = lambda p,s,r: levenshtein('%s : %s' % \ (r.title,r.subtitle), p) \ if r.subtitle is not None else \ levenshtein(r.title, p) if tolerance is None: tolerance = int(self.db.settings.NULL.\ get('MetadataLookupTolerance', 5)) if subtitle is not None: res = self.command('-N', '"%s" "%s"' % (phrase, subtitle)) else: res = self.command('-M', '"%s"' % phrase) for r in res: r.levenshtein = func(phrase, subtitle, r) if r.levenshtein > tolerance: continue yield r
def _name_match_quality(name, tvtitle): distance = levenshtein(name.lower(), tvtitle.lower()) if len(tvtitle) > len(name): match_quality = float(len(tvtitle) - distance) / len(tvtitle) else: match_quality = float(len(name) - distance) / len(name) return match_quality
def _getSeries(self, series, by_id=False): """This searches TheTVDB.com for the series name, If a custom_ui UI is configured, it uses this to select the correct series. If not, and interactive == True, ConsoleUI is used, if not BaseUI is used to select the first result. """ allSeries = self.search(series) if self.config['custom_ui'] is not None: log().debug("Using custom UI %s" % (repr(self.config['custom_ui']))) ui = self.config['custom_ui'](config=self.config) else: if not self.config['interactive']: log().debug('Auto-selecting first search result using BaseUI') ui = BaseUI(config=self.config) else: log().debug('Interactively selecting show using ConsoleUI') ui = ConsoleUI(config=self.config) if self.config['sort_series']: series_lowercase = series.lower() for s in allSeries: s[u'match_similarity'] = levenshtein(series_lowercase, s[u'seriesName'].lower()) allSeries.sort(key=lambda s: s[u'match_similarity']) return ui.selectSeries(allSeries)
def buildNumbers(args, opts): # either option -N <inetref> <subtitle> e.g. -N 69 "Elizabeth Keen" # or option -N <inetref> <date time> e.g. -N 69 "2021-01-29 19:00:00" # or option -N <title> <subtitle> e.g. -N "The Blacklist" "Elizabeth Keen" # or option -N <title> <date time> e.g. -N "The Blacklist" "2021-01-29 19:00:00" from MythTV.utility import levenshtein from MythTV.utility.dt import posixtzinfo from MythTV.tvmaze import tvmaze_api as tvmaze from MythTV import datetime from lxml import etree from datetime import timedelta if opts.debug: print("Function 'buildNumbers' called with arguments: " + (" ".join(["'%s'" % i for i in args]))) # set the session if opts.session: tvmaze.set_session(opts.session) dtInLocalZone = None # ToDo: # below check shows a deficiency of the MythTV grabber API itself: # TV-Shows or Movies with an integer as title are not recognized correctly. # see https://www.mythtv.org/wiki/MythTV_Universal_Metadata_Format # and https://code.mythtv.org/trac/ticket/11850 try: inetref = int(args[0]) tvsubtitle = args[1] inetrefList = [inetref] except ValueError: tvtitle = args[0] tvsubtitle = args[1] inetrefList = [] # inetrefs for shows with title matches best_show_quality = 0.5 # require at least this quality on string match showlist = tvmaze.search_show(tvtitle) # It's problematic to make decisions solely upon the Levenshtein distance. # If the strings are really long or really short, a simple rule, such as # "accept any distance < 6" can provide misleading results. # To establish a more useful measurement, we'll use the Levenshtein # distance to figure out the ratio (0 - 1) of matching characters in the # longer string, and call this 'match_quality'. # "Risk", "Call" -> distance = 4 # match_quality = (4 - 4) / 4 = 0 # "In Sickness and in Health", "Sickness and Health" -> distance = 6 # match_quality = (25 - 6)/25 = .76 for show_info in showlist: try: inetref = int(show_info.id) distance = levenshtein(show_info.name.lower(), tvtitle.lower()) if len(tvtitle) > len(show_info.name): match_quality = float(len(tvtitle) - distance) / len(tvtitle) else: match_quality = float(len(show_info.name) - distance) / len(show_info.name) if match_quality >= best_show_quality: #if opts.debug: #print ('show_info =', show_info, ', match_quality =', match_quality) if match_quality == best_show_quality: inetrefList.append(inetref) else: # Any items previously appended for a lesser match need to be eliminated inetrefList = [inetref] best_show_quality = match_quality except (TypeError, ValueError): pass # check whether the 'subtitle' is really a timestamp try: dtInLocalZone = datetime.strptime( tvsubtitle, "%Y-%m-%d %H:%M:%S") # defaults to local timezone except ValueError: dtInLocalZone = None matchesFound = 0 best_ep_quality = 0.5 # require at least this quality on string match tree = etree.XML(u'<metadata></metadata>') for inetref in inetrefList: dtInTgtZone = None if dtInLocalZone: try: show_info = tvmaze.get_show(inetref) # Some cases have 'network' = None, but webChannel != None. If we # find such a case, we'll set show_network to the webChannel. show_network = show_info.network if show_network is None: show_network = show_info.streaming_service show_country = show_network.get('country') show_tz = show_country.get('timezone') dtInTgtZone = dtInLocalZone.astimezone(posixtzinfo(show_tz)) except (ValueError, AttributeError) as e: dtInTgtZone = None if dtInTgtZone: # get episode info based on inetref and datetime in target zone try: #print('get_show_episodes_by_date(', inetref, ',', dtInTgtZone, ')') episodes = tvmaze.get_show_episodes_by_date( inetref, dtInTgtZone) except SystemExit: episodes = [] time_match_list = [] early_match_list = [] minTimeDelta = timedelta(minutes=60) for i, ep in enumerate(episodes): epInTgtZone = datetime.fromIso(ep.timestamp, tz=posixtzinfo(show_tz)) durationDelta = timedelta(minutes=ep.duration) # Consider it a match if the recording starts late, but within the duration of the show. if epInTgtZone <= dtInTgtZone < epInTgtZone + durationDelta: # Recording start time is within the range of this episode if opts.debug: print('Recording in range of inetref %d, season %d, episode %d (%s ... %s)' \ % (inetref, ep.season, ep.number, epInTgtZone, epInTgtZone+durationDelta)) time_match_list.append(i) minTimeDelta = timedelta(minutes=0) # Consider it a match if the recording is a little bit early. This helps cases # where you set up a rule to record, at say 9:00, and the broadcaster uses a # slightly odd start time, like 9:05. elif epInTgtZone - minTimeDelta <= dtInTgtZone < epInTgtZone: # Recording started earlier than this episode, so see if it's the closest match if epInTgtZone - dtInTgtZone == minTimeDelta: if opts.debug: print('adding episode to closest list', epInTgtZone - dtInTgtZone, '\n') early_match_list.append(i) elif epInTgtZone - dtInTgtZone < minTimeDelta: if opts.debug: print('this episode is new closest', epInTgtZone - dtInTgtZone, '\n') minTimeDelta = epInTgtZone - dtInTgtZone early_match_list = [i] if not time_match_list: # No exact matches found, so use the list of the closest episode(s) time_match_list = early_match_list if time_match_list: for ep_index in time_match_list: season_nr = str(episodes[ep_index].season) episode_id = episodes[ep_index].id item = buildSingleItem(inetref, season_nr, episode_id) if item is not None: tree.append(item.toXML()) matchesFound += 1 else: # get episode based on subtitle episodes = tvmaze.get_show_episode_list(inetref) min_dist_list = [] for i, ep in enumerate(episodes): if 0 and opts.debug: print("tvmaze.get_show_episode_list(%s) returned :" % inetref) for k, v in ep.__dict__.items(): print(k, " : ", v) distance = levenshtein(ep.name, tvsubtitle) if len(tvsubtitle) >= len(ep.name): match_quality = float(len(tvsubtitle) - distance) / len(tvsubtitle) else: match_quality = float(len(ep.name) - distance) / len( ep.name) #if opts.debug: #print('inetref', inetref, 'episode =', ep.name, ', distance =', distance, ', match_quality =', match_quality) if match_quality >= best_ep_quality: if match_quality == best_ep_quality: min_dist_list.append(i) if opts.debug: print( '"%s" added to best list, match_quality = %g' % (ep.name, match_quality)) else: # Any items previously appended for a lesser match need to be eliminated tree = etree.XML(u'<metadata></metadata>') min_dist_list = [i] best_ep_quality = match_quality if opts.debug: print('"%s" is new best match_quality = %g' % (ep.name, match_quality)) # The list is constructed in order of oldest season to newest. # If episodes with equivalent match quality show up in multiple # seasons, we want to list the most recent first. To accomplish # this, we'll process items starting at the end of the list, and # proceed to the beginning. while min_dist_list: ep_index = min_dist_list.pop() season_nr = str(episodes[ep_index].season) episode_id = episodes[ep_index].id if opts.debug: episode_nr = str(episodes[ep_index].number) print("tvmaze.get_show_episode_list(%s) returned :" % inetref) print("with season : %s and episode %s" % (season_nr, episode_nr)) print( "Chosen episode index '%d' based on match quality %g" % (ep_index, best_ep_quality)) # we have now inetref, season, episode_id item = buildSingleItem(inetref, season_nr, episode_id) if item is not None: tree.append(item.toXML()) matchesFound += 1 if matchesFound > 0: print_etree( etree.tostring(tree, encoding='UTF-8', pretty_print=True, xml_declaration=True)) else: if dtInLocalZone: raise Exception( "Cannot find any episode with timestamp matching '%s'." % tvsubtitle) else: # tvmaze.py -N 4711 "Episode 42" raise Exception("Cannot find any episode with subtitle '%s'." % tvsubtitle)