Пример #1
0
def main():
    args = parse_args()
    try:
        execute(args)
    except ICheckMoviesMisconfigurationException as e:
        command_line.error(str(e))
        sys.exit(1)
Пример #2
0
def execute(args):
    try:
        parser = get_parser_from_arg(args.source)(args)
        movies = execute_parsing(args, parser)
        execute_inserting(args, movies, parser)
    except RatSException as e:
        command_line.error(str(e))
Пример #3
0
 def _handle_captcha_challenge_if_present(self):
     if len(self.browser.find_elements_by_xpath("//div[@id='auth-captcha-image-container']")) > 0:
         command_line.error("Login to {site_name} failed.".format(site_name=self.site_name))
         sys.stdout.write("There seems to be a Captcha challenge present for the login. Please try again later.\r\n")
         sys.stdout.flush()
         self.browser_handler.kill()
         sys.exit(1)
Пример #4
0
def main():
    args = parse_args()
    try:
        execute(args)
    except RatSException as e:
        command_line.error(str(e))
        sys.exit(1)
Пример #5
0
 def _handle_login_unsuccessful(self):
     time.sleep(1)
     if self._user_is_not_logged_in():
         command_line.error("Login to {site_name} failed.".format(site_name=self.site_name))
         sys.stdout.write("Please check if the credentials are correctly set in your credentials.cfg\r\n")
         sys.stdout.flush()
         self.browser_handler.kill()
         sys.exit(1)
Пример #6
0
 def _check_login_successful(self):
     if len(self.browser.find_elements_by_xpath(self.LOGIN_BUTTON_SELECTOR)) > 0 \
             and len(self.browser.find_elements_by_xpath(self.LOGIN_USERNAME_SELECTOR)) > 0 \
             and len(self.browser.find_elements_by_xpath(self.LOGIN_PASSWORD_SELECTOR)) > 0:
         command_line.error("Login to %s failed." % self.site_name)
         sys.stdout.write("Please check if the credentials are correctly set in your credentials.cfg\r\n")
         sys.stdout.flush()
         self.kill_browser()
         sys.exit(1)
Пример #7
0
def execute_inserting(args, movies, parser):
    if args.destination:
        if len(movies) == 0:
            command_line.error("There are no files to be inserted. Did the Parser run properly?")
            sys.exit(1)
        # INSERT THE DATA
        for dest in args.destination:
            inserter = get_inserter_from_arg(dest)(args)
            insert_movie_ratings(inserter, movies, type(parser.site).__name__)
Пример #8
0
def get_inserter_from_arg(param):
    try:
        return INSERTERS[param.upper()]
    except KeyError:
        command_line.error(f"No inserter matching '{param}' found.")
        sys.stdout.write("Available inserters:\r\n")
        for inserter in INSERTERS:
            sys.stdout.write(f" - {inserter} \n")
        sys.stdout.flush()
        sys.exit(1)
Пример #9
0
def get_parser_from_arg(param):
    try:
        return PARSERS[param.upper()]
    except KeyError:
        command_line.error("No parser matching '{entered_parser}' found.".format(entered_parser=param))
        sys.stdout.write("Available parsers:\r\n")
        for parser in PARSERS:
            sys.stdout.write(' - {parser} \n'.format(parser=parser))
        sys.stdout.flush()
        sys.exit(1)
Пример #10
0
def execute_inserting(args, movies, parser):
    destinations = list(INSERTERS.keys()) if args.all_destinations \
        else [destination.upper() for destination in args.destination]
    _filter_source_site_from_destinations(destinations, parser.site.site_name)
    if destinations:
        if len(movies) == 0:
            command_line.error(
                "There are no files to be inserted. Did the parser run properly?"
            )
            sys.exit(1)
        # INSERT THE DATA
        for destination in destinations:
            inserter = get_inserter_from_arg(destination)(args)
            insert_movie_ratings(inserter, movies, type(parser.site).__name__)
Пример #11
0
def execute_parsing(args, parser):
    if not parser.site.CREDENTIALS_VALID:
        command_line.error(
            "No valid credentials found for {site_name}. Skipping parsing.".
            format(site_name=parser.site.site_name))
        sys.exit(1)
    if args.file:
        # LOAD FROM FILE
        movies = load_data_from_file(args.file)
        parser.site.browser_handler.kill()
    else:
        # PARSE DATA
        movies = parse_data_from_source(parser)
    return movies
Пример #12
0
 def _retrieve_pages_count_and_movies_count(self, movie_ratings_page):
     get_session_response = self.site.browser.execute_script("""
         var xmlHttp = new XMLHttpRequest();
         xmlHttp.open( "GET", "https://www.moviepilot.de/api/session", false );
         xmlHttp.send( null );
         return xmlHttp.responseText;
     """)
     session = json.loads(get_session_response)
     if 'movie_ratings' not in session:
         command_line.error('Could not establish a session. '
                            'Please try again with the -x option if the problem persists.')
         self.site.browser_handler.kill()
         sys.exit(1)
     self.movies_count = session['movie_ratings']
     pages_count = math.ceil(self.movies_count / 100)
     return pages_count
Пример #13
0
def insert_movie_ratings(inserter, movies, source):
    if inserter.site.CREDENTIALS_VALID:
        try:
            inserter.insert(movies, source)
        except Exception:  # pylint: disable=broad-except
            # exception should be logged in a file --> issue #15
            sys.stdout.flush()
            inserter.site.browser_handler.kill()
            command_line.error(
                "There was an exception inside {site_name} (see below). Skipping insertion."
                .format(site_name=inserter.site.site_name))
            traceback.print_exc()
    else:
        command_line.warn(
            "No valid credentials found for {site_name}. Skipping insertion.".
            format(site_name=inserter.site.site_name))
Пример #14
0
def parse_data_from_source(parser):
    try:
        movies = parser.parse()
    except RatSException as e:
        command_line.error(str(e))
        sys.exit(1)

    json_filename = f"{TIMESTAMP}_{type(parser.site).__name__}.json"
    file_impex.save_movies_to_json(movies,
                                   folder=EXPORTS_FOLDER,
                                   filename=json_filename)
    sys.stdout.write(
        f"\r\n===== {parser.site.site_displayname}: saved {len(movies)} parsed movies to "
        f"{EXPORTS_FOLDER}/{json_filename}\r\n")
    sys.stdout.flush()
    return movies
Пример #15
0
    def _parse_ratings(self):
        self.before = os.listdir(self.exports_folder)  # pylint: disable=attribute-defined-outside-init
        self._download_ratings_csv()

        after = os.listdir(self.exports_folder)
        change = self._get_downloaded_filename(after, self.before)
        if len(change) == 1:
            archive_filename = change.pop(
            )  # the one file that was added to the dir
            file_impex.extract_file_from_archive(
                os.path.join(self.exports_folder, archive_filename),
                self.downloaded_file_name, self.exports_folder)
            self._rename_csv_file(self.downloaded_file_name)
            self.movies = self._parse_movies_from_csv(
                os.path.join(self.exports_folder, self.csv_filename))
        else:
            command_line.error('Could not determine file location')
Пример #16
0
    def _parse_ratings(self):
        before = os.listdir(self.exports_folder)
        self._download_ratings_csv()

        after = os.listdir(self.exports_folder)
        change = self._get_downloaded_filename(after, before)
        if len(change) == 1:
            archive_filename = change.pop()  # the one file that was added to the dir
            ratings_csv_filename = 'ratings.csv'
            file_impex.extract_file_from_archive(
                os.path.join(self.exports_folder, archive_filename),
                ratings_csv_filename,
                self.exports_folder
            )
            self._rename_csv_file('ratings.csv')
            self.movies = self._parse_movies_from_csv(os.path.join(self.exports_folder, self.csv_filename))
        else:
            command_line.error('Could not determine file location')
Пример #17
0
def execute_inserting(args, movies, parser):
    if not args.all_destinations and not args.destination:
        return
    destinations = list(INSERTERS.keys()) if args.all_destinations \
        else [destination.upper() for destination in args.destination]
    _filter_source_site_from_destinations(destinations, parser.site.site_name)
    if destinations:
        if len(movies) == 0:
            NoMoviesForInsertion(
                "There are no files to be inserted. Did the parser run properly?"
            )
        # INSERT THE DATA
        for destination in destinations:
            try:
                inserter = get_inserter_from_arg(destination)(args)
                insert_movie_ratings(inserter, movies,
                                     type(parser.site).__name__)
            except RatSException as e:
                command_line.error(str(e))
Пример #18
0
    def _download_ratings_csv(self):
        sys.stdout.write('\r===== {site_displayname}: Retrieving ratings CSV file'.format(
            site_displayname=self.site.site_displayname)
        )
        sys.stdout.flush()
        self.site.browser.set_page_load_timeout(10)
        time.sleep(1)

        iteration = 0
        while not self._file_was_downloaded():
            iteration += 1
            try:
                self._call_download_url()
            except TimeoutException as e:
                if iteration > 10:
                    raise e
                time.sleep(iteration * 1)
                continue
            if iteration > 10:
                command_line.error("The CSV file containing the movies data could not be downloaded.")
                sys.exit(1)
Пример #19
0
def parse_data_from_source(parser):
    try:
        movies = parser.parse()
    except RatSException as e:
        command_line.error(str(e))
        sys.exit(1)

    json_filename = '{timestamp}_{sitename}.json'.format(
        timestamp=TIMESTAMP, sitename=type(parser.site).__name__)
    file_impex.save_movies_to_json(movies,
                                   folder=EXPORTS_FOLDER,
                                   filename=json_filename)
    sys.stdout.write(
        '\r\n===== {site_displayname}: saved {parsed_movies_count} parsed movies to '
        '{folder}/{filename}\r\n'.format(
            site_displayname=parser.site.site_displayname,
            parsed_movies_count=len(movies),
            folder=EXPORTS_FOLDER,
            filename=json_filename))
    sys.stdout.flush()
    return movies
Пример #20
0
 def handle_request_blocked_by_website(self):
     if 'stackpath' in self.browser.page_source:
         command_line.error("The request was blocked by the website.")
         self.browser_handler.kill()
Пример #21
0
 def post_upload_action(self):
     if 'Sorry, a problem occurred while importing your ratings.' in self.site.browser.page_source:
         command_line.error("Couldn't upload CSV.")
         self.site.browser_handler.kill()