def get_movie_id(self, movie, year=None): """Get the best matching movie id for `movie`, `year`. :param str movie: movie. :param year: year of the movie, if any. :type year: int :return: the movie id, if found. :rtype: int """ movie_id = None # get the movie id logger.info('Getting movie id') r = self.session.get(self.server_url + 'search.php?search=' + quote_plus(movie), timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # populate the movie id movies_table = soup.find('table', {'class': 'tabel'}) movies = movies_table.find_all('tr') for item in movies: link = item.find('a', href=True) if link: if link['href'].startswith('movie/'): splitted_uri = link['href'].split('/') if len(splitted_uri) == 2: media_id = splitted_uri[1] else: continue media_title = link.text match = re.search(r'(.+)\s\((\d{4})\)$', media_title) if match: media_name = match.group(1) media_year = match.group(2) if sanitize(media_name.lower()) == sanitize(movie.lower()) and media_year == str(year): movie_id = media_id soup.decompose() soup = None logger.debug(f'Found this movie id: {movie_id}') if not movie_id: logging.debug(f"Addic7ed: Cannot find this movie with guessed year {year}: {movie}") return movie_id
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict # patch: add punctuation cleaning """ # get the show page logger.info('Getting show ids') region.set(self.last_show_ids_fetch_key, datetime.datetime.now()) r = self.session.get(self.server_url, timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # populate the show ids show_ids = {} shows = soup.find(id='qsShow') for show in shows: if hasattr(show, 'attrs'): try: show_id = int(show.attrs['value']) except ValueError: continue if show_id != 0: show_clean = sanitize(show.text, default_characters=self.sanitize_characters) show_ids[show_clean] = show_id match = series_year_re.match(show_clean) if match and match.group(2) and match.group(1) not in show_ids: # year found, also add it without year show_ids[match.group(1)] = show_id soup.decompose() soup = None logger.debug('Found %d show ids', len(show_ids)) if not show_ids: raise Exception("Addic7ed: No show IDs found!") return show_ids