示例#1
0
def _get_functions_from_zip_archive(
        archive_path: str,
        github_repo: models.GithubRepo) -> tp.Set[parser.Function]:
    with tempfile.TemporaryDirectory() as temp_dir:
        with utils.log_time(loggers.loader, f'unzipping {archive_path}'):
            _unzip_file(archive_path, temp_dir)
        return _get_functions_from_directory(temp_dir, github_repo)
示例#2
0
def _get_functions_from_github_repo(
        github_repo: models.GithubRepo) -> tp.Set[parser.Function]:
    with tempfile.NamedTemporaryFile() as temp_file:
        with utils.log_time(loggers.loader,
                            f'fetching {github_repo.zipball_url}'):
            _urlretrieve(github_repo.zipball_url, temp_file.name)
        return _get_functions_from_zip_archive(temp_file.name, github_repo)
示例#3
0
def _find_github_repos(github_client, full_names) -> ta.GithubRepos:
    github_repos = []
    log_msg = f'loading {len(full_names)} github repos'
    with utils.log_time(loggers.loader, log_msg):
        for a_full_name in full_names:
            repository = github_client.get_repo(a_full_name, lazy=False)
            github_repos.append(_parse_repository(repository))
    return github_repos
示例#4
0
def _select_functions_to_add(
        functions: tp.Set[parser.Function]) -> ta.ParserFunctions:
    with utils.log_time(
            loggers.loader,
            f'select good functions from {len(functions)} functions'):
        good_functions = select_good_functions(functions)
        loggers.loader.info('selected %d/%d good functions',
                            len(good_functions), len(functions))
        functions_to_add = _select_random_functions(good_functions)
        loggers.loader.info('selected %d/%d random functions',
                            len(functions_to_add), len(good_functions))
    return functions_to_add
示例#5
0
def _get_functions_from_directory(
        dir_path: str,
        github_repo: models.GithubRepo) -> tp.Set[parser.Function]:
    functions = set()
    parser_options = parser.Options(catch_exceptions=True, verbose=False)
    with utils.log_time(loggers.loader, f'parsing {github_repo.url}'):
        py_files = _find_files(dir_path, extension='py')
        for a_file in py_files:
            functions.update(parser.get_functions(a_file, parser_options))
    loggers.loader.info('found %d distinct functions in %d files',
                        len(functions), len(py_files))
    return functions