def test_clone_and_del_repo(git_path): """ test clone and del repo function. """ # del repo if it exisits if os.path.exists(os.path.basename(git_path)): delete_repo(os.path.basename(git_path)) # clone repo_path = clone_repo(git_path) assert os.path.exists(repo_path) assert os.path.basename(repo_path).startswith(os.path.basename(git_path)) # delete should have return code of 0 (success) if not delete_repo(repo_path) == 0: raise AssertionError
def test_check_run_save(tmp_path, retry_count): # init vars git_path = "https://github.com/urlstechie/urlchecker-test-repo" file_types = [".py", ".md"] print_all = True white_listed_urls = [ "https://superkogito.github.io/figures/fig2.html", "https://superkogito.github.io/figures/fig4.html", ] white_listed_patterns = ["https://superkogito.github.io/tables"] timeout = 1 force_pass = False # clone repo base_path = clone_repo(git_path) # get all file paths in subfolder specified base_path = os.path.join(base_path, "test_files") file_paths = get_file_paths(base_path, file_types) # check repo urls checker = UrlChecker(print_all=print_all) check_results = checker.run( file_paths=file_paths, white_listed_urls=white_listed_urls, white_listed_patterns=white_listed_patterns, retry_count=retry_count, timeout=timeout, ) # Test saving to file output_file = os.path.join(str(tmp_path), "results.csv") assert not os.path.exists(output_file) saved_file = checker.save_results(output_file) assert os.path.exists(output_file) # Read in output file with open(saved_file, "r") as filey: lines = filey.readlines() # Header line has three items assert lines[0] == "URL,RESULT,FILENAME\n" # Ensure content looks okay for line in lines[1:]: url, result, filename = line.split(",") root = filename.split('/')[0] assert url.startswith("http") assert result in ["passed", "failed"] assert re.search("(.py|.md)$", filename) # Save with full path saved_file = checker.save_results(output_file, relative_paths=False) # Read in output file with open(saved_file, "r") as filey: lines = filey.readlines() # Ensure content looks okay for line in lines[1:]: url, result, filename = line.split(",") assert not filename.startswith(root)
def main(args, extra): """ Main entrypoint for running a check. We expect an args object with arguments from the main client. From here we determine the path to parse (or GitHub url to clone) and call the main check function under main/check.py Args: - args : the argparse ArgParser with parsed args - extra : extra arguments not handled by the parser """ path = args.path # Case 1: specify present working directory if not path or path == ".": path = os.getcwd() logging.debug("Path specified as present working directory, %s" % path) # Case 2: We need to clone elif re.search("^(git@|http)", path): logging.debug("Repository url %s detected, attempting clone" % path) path = clone_repo(path, branch=args.branch) # Add subfolder to path if args.subfolder: path = os.path.join(path, args.subfolder) # By the time we get here, a path must exist if not os.path.exists(path): sys.exit("Error %s does not exist." % path) # Parse file types, and excluded urls and files (includes absolute and patterns) file_types = args.file_types.split(",") exclude_urls = remove_empty(args.exclude_urls.split(",")) exclude_patterns = remove_empty(args.exclude_patterns.split(",")) exclude_files = remove_empty(args.exclude_files.split(",")) files = remove_empty(args.files.split(",")) # Alert user about settings print(" original path: %s" % args.path) print(" final path: %s" % path) print(" subfolder: %s" % args.subfolder) print(" branch: %s" % args.branch) print(" cleanup: %s" % args.cleanup) print(" file types: %s" % file_types) print(" files: %s" % files) print(" print all: %s" % (not args.no_print)) print(" verbose: %s" % (args.verbose)) print(" urls excluded: %s" % exclude_urls) print(" url patterns excluded: %s" % exclude_patterns) print(" file patterns excluded: %s" % exclude_files) print(" force pass: %s" % args.force_pass) print(" retry count: %s" % args.retry_count) print(" save: %s" % args.save) print(" timeout: %s" % args.timeout) # Instantiate a new checker with provided arguments checker = UrlChecker( path=path, file_types=file_types, include_patterns=files, exclude_files=exclude_files, print_all=not args.no_print, ) check_results = checker.run( exclude_urls=exclude_urls, exclude_patterns=exclude_patterns, retry_count=args.retry_count, timeout=args.timeout, ) # save results to file, if save indicated if args.save: checker.save_results(args.save) # delete repo when done, if requested if args.cleanup: logger.info("Cleaning up %s..." % path) delete_repo(path) # Case 1: We didn't find any urls to check if not check_results["failed"] and not check_results["passed"]: print("\n\n\U0001F937. No urls were collected.") sys.exit(0) # Case 2: We had errors, print them for the user if check_results["failed"]: if args.verbose: print("\n\U0001F914 Uh oh... The following urls did not pass:"******"failed"]: print_failure(file_name + ":") for url in result["failed"]: print_failure(" " + url) else: print("\n\U0001F914 Uh oh... The following urls did not pass:"******"failed"]: print_failure(failed_url) # If we have failures and it's not a force pass, exit with 1 if not args.force_pass and check_results["failed"]: sys.exit(1) # Finally, alert user if we are passing conditionally if check_results["failed"]: print("\n\U0001F928 Conditional pass force pass True.") else: print("\n\n\U0001F389 All URLS passed!") sys.exit(0)