def search_titles(self, titles, season, title_year, imdb_id): """Search for titles matching the `title`. For episodes, each season has it own title :param str titles: the titles to search for. :param int season: season of the title :param int title_year: year of the title :return: found titles. :rtype: dict """ titles_found = {} for title in titles: sanitized_titles = [sanitize(title)] ignore_characters = {'\'', '.'} if any(c in title for c in ignore_characters): sanitized_titles.append( sanitize(title, ignore_characters=ignore_characters)) for sanitized_title in sanitized_titles: # make the query if season: logger.info('Searching episode title %r for season %r', sanitized_title, season) else: logger.info('Searching movie title %r', sanitized_title) r = self.session.get( self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10) raise_for_status(r) results = json.loads(r.text) # loop over results for result in results: source = result['_source'] # extract id title_id = int(source['id_filme']) # extract type title = { 'type': type_map[source['tipo']], 'title2': None, 'imdb_id': None } # extract title, year and country name, year, country = title_re.match( source['dsc_nome']).groups() title['title'] = name if "dsc_nome_br" in source: name2, ommit1, ommit2 = title_re.match( source['dsc_nome_br']).groups() title['title2'] = name2 # extract imdb_id if source['id_imdb'] != '0': if not source['id_imdb'].startswith('tt'): title['imdb_id'] = 'tt' + source['id_imdb'].zfill( 7) else: title['imdb_id'] = source['id_imdb'] # extract season if title['type'] == 'episode': if source['temporada'] and source['temporada'].isdigit( ): title['season'] = int(source['temporada']) else: match = season_re.search(source['dsc_nome_br']) if match: title['season'] = int(match.group('season')) else: logger.debug( 'No season detected for title %d (%s)', title_id, name) # extract year if year: title['year'] = int(year) elif source['dsc_data_lancamento'] and source[ 'dsc_data_lancamento'].isdigit(): # year is based on season air date hence the adjustment title['year'] = int( source['dsc_data_lancamento']) - title.get( 'season', 1) + 1 # add title only if is valid # Check against title without ignored chars if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year, imdb_id): logger.debug(u'Found title: %s', title) titles_found[title_id] = title logger.debug('Found %d titles', len(titles_found)) return titles_found
def search_titles(self, title, season, title_year, imdb_id): """Search for titles matching the `title`. For episodes, each season has it own title :param str title: the title to search for. :param int season: season of the title :param int title_year: year of the title :return: found titles. :rtype: dict """ titles = {} sanitized_titles = [sanitize(title)] ignore_characters = {'\'', '.'} if any(c in title for c in ignore_characters): sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters)) for sanitized_title in sanitized_titles: # make the query if season: logger.info('Searching episode title %r for season %r', sanitized_title, season) else: logger.info('Searching movie title %r', sanitized_title) r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10) raise_for_status(r) results = json.loads(r.text) # loop over results for result in results: source = result['_source'] # extract id title_id = int(source['id_filme']) # extract type title = {'type': type_map[source['tipo']], 'title2': None, 'imdb_id': None} # extract title, year and country name, year, country = title_re.match(source['dsc_nome']).groups() title['title'] = name if "dsc_nome_br" in source: name2, ommit1, ommit2 = title_re.match(source['dsc_nome_br']).groups() title['title2'] = name2 # extract imdb_id if source['id_imdb'] != '0': if not source['id_imdb'].startswith('tt'): title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7) else: title['imdb_id'] = source['id_imdb'] # extract season if title['type'] == 'episode': if source['temporada'] and source['temporada'].isdigit(): title['season'] = int(source['temporada']) else: match = season_re.search(source['dsc_nome_br']) if match: title['season'] = int(match.group('season')) else: logger.debug('No season detected for title %d (%s)', title_id, name) # extract year if year: title['year'] = int(year) elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit(): # year is based on season air date hence the adjustment title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1 # add title only if is valid # Check against title without ignored chars if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year, imdb_id): logger.debug(u'Found title: %s', title) titles[title_id] = title logger.debug('Found %d titles', len(titles)) return titles