def get_details_of_one_compilation(self, idfa_data, url): compilation_data = None film = None if url in idfa_data.film_id_by_url.keys(): filmid = idfa_data.film_id_by_url[url] film_file = film_file_format.format(filmid) if os.path.isfile(film_file): charset = web_tools.get_charset(film_file) with open(film_file, 'r', encoding=charset) as f: compilation_data = f.read() if compilation_data is None: print(f'Downloading site of combination program: {url}') url_reader = web_tools.UrlReader(Globals.error_collector) compilation_data = url_reader.read_url(url) if compilation_data is not None: print(f'Parsing FILM INFO from: {url}') film = CompilationPageParser(idfa_data, url, film).feed(compilation_data) if film is not None: film_file = film_file_format.format(filmid) if not os.path.isfile(film_file): print(f'Writing HTML data of: {film.title}') with open(film_file, 'w') as f: f.write(compilation_data) print( f'Parsing SCREENINGS of combination program: {film.title}') ScreeningsParser(idfa_data, film).feed(compilation_data) else: Globals.error_collector.add( 'Parsing of COPMBINATION PROGRAM site failed', url) return film
def get_screenings_of_one_film(self, film_html_file, nff_data): title = self.film.title if os.path.isfile(film_html_file): self.print_debug("-- Analysing premiêre page of title:", title) premiere_parser = PremierePageParser(self.film, nff_data) charset = web_tools.get_charset(film_html_file) with open(film_html_file, 'r', encoding=charset) as f: text = '\n' + '\n'.join([line for line in f]) premiere_parser.feed(text)
def get_film_details(imagine_data): for film in imagine_data.films: html_data = None film_file = film_file_format.format(film.filmid) if os.path.isfile(film_file): charset = web_tools.get_charset(film_file) with open(film_file, 'r', encoding=charset) as f: html_data = f.read() else: print(f"Downloading site of {film.title}: {film.url}") html_data = web_tools.UrlReader(Globals.error_collector).load_url(film.url, film_file) if html_data is not None: print(f"Analysing html file {film.filmid} of {film.title} {film.url}") FilmPageParser(imagine_data, film).feed(html_data)
def parse_film_pages(self, nff_data): for film in nff_data.films: film_file = film_file_format.format(film.filmid) print( f"Now reading {film_file} - {film.title} ({film.duration_str()})" ) try: charset = web_tools.get_charset(film_file) with open(film_file, 'r', encoding=charset) as f: film_text = f.read() self.parse_one_film_page(film, film_text, nff_data) except FileNotFoundError as e: Globals.error_collector.add( e, "while parsing film pages in ScreeningsLoader")
def get_films(self, data): for page_number in range(1, self.page_count + 1): az_data = None az_file = az_file_format.format(page_number) if os.path.isfile(az_file): charset = web_tools.get_charset(az_file) with open(az_file, 'r', encoding=charset) as f: az_data = f.read() else: az_page = az_webroot_format.format(page_number) print(f"Downloading {az_page}.") url_reader = web_tools.UrlReader(Globals.error_collector) az_data = url_reader.load_url(az_page, az_file) parser = AzPageParser(data) parser.feed(az_data)
def get_details_of_one_film(self, idfa_data, film): film_data = None film_file = film_file_format.format(film.filmid) if os.path.isfile(film_file): charset = web_tools.get_charset(film_file) with open(film_file, 'r', encoding=charset) as f: film_data = f.read() else: print(f"Downloading site of {film.title}: {film.url}") url_reader = web_tools.UrlReader(Globals.error_collector) film_data = url_reader.load_url(film.url, film_file) if film_data is not None: print(f'Parsing FILM INFO of: {film.title}') filminfo_parser = FilmPageParser(idfa_data, film) filminfo_parser.feed(film_data) print(f'Parsing SCREENINGS of: {film.title}') screenings_parser = ScreeningsParser(idfa_data, film) screenings_parser.feed(film_data)