def _get_functions_from_zip_archive( archive_path: str, github_repo: models.GithubRepo) -> tp.Set[parser.Function]: with tempfile.TemporaryDirectory() as temp_dir: with utils.log_time(loggers.loader, f'unzipping {archive_path}'): _unzip_file(archive_path, temp_dir) return _get_functions_from_directory(temp_dir, github_repo)
def _get_functions_from_github_repo( github_repo: models.GithubRepo) -> tp.Set[parser.Function]: with tempfile.NamedTemporaryFile() as temp_file: with utils.log_time(loggers.loader, f'fetching {github_repo.zipball_url}'): _urlretrieve(github_repo.zipball_url, temp_file.name) return _get_functions_from_zip_archive(temp_file.name, github_repo)
def _find_github_repos(github_client, full_names) -> ta.GithubRepos: github_repos = [] log_msg = f'loading {len(full_names)} github repos' with utils.log_time(loggers.loader, log_msg): for a_full_name in full_names: repository = github_client.get_repo(a_full_name, lazy=False) github_repos.append(_parse_repository(repository)) return github_repos
def _select_functions_to_add( functions: tp.Set[parser.Function]) -> ta.ParserFunctions: with utils.log_time( loggers.loader, f'select good functions from {len(functions)} functions'): good_functions = select_good_functions(functions) loggers.loader.info('selected %d/%d good functions', len(good_functions), len(functions)) functions_to_add = _select_random_functions(good_functions) loggers.loader.info('selected %d/%d random functions', len(functions_to_add), len(good_functions)) return functions_to_add
def _get_functions_from_directory( dir_path: str, github_repo: models.GithubRepo) -> tp.Set[parser.Function]: functions = set() parser_options = parser.Options(catch_exceptions=True, verbose=False) with utils.log_time(loggers.loader, f'parsing {github_repo.url}'): py_files = _find_files(dir_path, extension='py') for a_file in py_files: functions.update(parser.get_functions(a_file, parser_options)) loggers.loader.info('found %d distinct functions in %d files', len(functions), len(py_files)) return functions