def analyse(self): ending = '' # sets an ending variable to differentate the source code files of Python and Java repos if self.repository.language == "Python": ending = '**/*.py' elif self.repository.language == "Java": ending = '**/*.java' else: print( "Error in analyse: language of repository matches neither Python nor Java" ) return directory_in_str = self.path + self.repository.name pathlist = Path(directory_in_str).glob(ending) # loops through all folders in a repository to find all source code files and analyses them for path in pathlist: # because path is object not string path_in_str = str(path) analysis = pygount.source_analysis(path_in_str, self.repository.language, encoding='utf-8') self.codelines += analysis.code + analysis.string self.emptylines += analysis.empty self.commentlines += analysis.documentation print("Analysed Repository: ", self.repository.name) return
def analyze(repo): extensions = LANGUAGES[repo['language']]['extensions'] r = Repo(repo['owner'], repo['name'], default_branch=repo['default_branch'], language=repo['language'], extensions=extensions) json_path = f'{r.path}.json' if os.path.isfile(json_path): repo = read_json(json_path) return repo repo['code'], repo['documentation'], repo['empty'] = (0, 0, 0) with r: for f in r.files: # Ignore symlinks. if not os.path.isfile(f): continue analysis = pygount.source_analysis(f, repo['language'], encoding='utf-8') if analysis.state == 'analyzed': repo['code'] += analysis.code + analysis.string repo['documentation'] += analysis.documentation repo['empty'] += analysis.empty write_json(json_path, repo) return repo
def button_analyse_code(self): IrModuleAuthor = self.env['ir.module.author'] IrModuleTypeRule = self.env['ir.module.type.rule'] rules = IrModuleTypeRule.search([]) cfg = self.env['ir.config_parameter'] val = cfg.get_param('module_analysis.exclude_directories', '') exclude_directories = [x.strip() for x in val.split(',') if x.strip()] val = cfg.get_param('module_analysis.exclude_files', '') exclude_files = [x.strip() for x in val.split(',') if x.strip()] for module in self: _logger.info("Analysing Code for module %s ..." % (module.name)) # Update Authors, based on manifest key authors = [] if module.author and module.author[0] == '[': author_txt_list = safe_eval(module.author) else: author_txt_list = module.author.split(',') author_txt_list = [x.strip() for x in author_txt_list] author_txt_list = [x for x in author_txt_list if x] for author_txt in author_txt_list: authors.append(IrModuleAuthor._get_or_create(author_txt)) author_ids = [x.id for x in authors] module.author_ids = author_ids # Update Module Type, based on rules module_type_id = rules._get_module_type_id_from_module(module) module.module_type_id = module_type_id # Get Path of module folder and parse the code module_path = get_module_path(module.name) # Get Files analysed_datas = self._get_analyse_data_dict() file_extensions = analysed_datas.keys() file_list = self._get_files_to_analyse(module_path, file_extensions, exclude_directories, exclude_files) for file_path, file_ext in file_list: file_res = pygount.source_analysis( file_path, '', encoding=self._get_module_encoding(file_ext)) for k, v in analysed_datas.get(file_ext).items(): v['value'] += getattr(file_res, k) # Update the module with the datas values = {} for file_ext, analyses in analysed_datas.items(): for k, v in analyses.items(): values[v['field']] = v['value'] module.write(values)
def scan_sloc(self, repo_name, repo_path, repo_owner, skip_path, suffix): db_session = self.create_db_session() row_repo = self.load_repository(db_session, repo_name) if row_repo == None: row_repo = Repository(name=repo_name, path=repo_path, owner=repo_owner) db_session.add(row_repo) gitrepo = GitRepo(repo_path + '/.git') existing_revision = [ _.hash for _ in row_repo.revisions] log.info(f'repo_name: {row_repo.name}') log.info(f'repo_path: {row_repo.path}') log.info(f'repo_owner: {row_repo.owner}') log.info(f'size of existing_revision: {len(existing_revision)}') analysis_cache = {} for commit in gitrepo.get_all_commit_id(): date = datetime.datetime.fromtimestamp(int(commit.commit_time)).strftime('%Y-%m-%d %H:%M:%S') hash = str(commit.id) log.info(f'{date}, {hash}') if hash not in existing_revision: log.info('add revision') existing_revision.append(hash) row_revision = Revision(hash=hash, commit_time=datetime.datetime.fromtimestamp(int(commit.commit_time))) row_repo.revisions.append(row_revision) gitrepo.checkout_by(hash) row_revision.slocs = [] for f in self.source_scanner(repo_path, skip_path, suffix): fcontent = '' with open(f, 'rb') as fh: fcontent = fh.read() if f in analysis_cache and analysis_cache[f][1] == fcontent: analysis = analysis_cache[f][0] # log.info('Use cache in analysis: %s', f) else: analysis = pygount.source_analysis(f, group='pygount', encoding='automatic') analysis_cache[f] = (analysis, fcontent) log.info(f'Analysis: {f}') row_revision.slocs.append(Sloc(language=analysis.language, filename=f, source_line=analysis.code, empty_line=analysis.empty)) db_session.commit() log.info('End of scaning.')
def analyze_directory(directory, include, exclude): """ Analyze source code files from directory applying given include and exclude patterns. :param str directory: Directory to search for files to analyze. :param list include: List of include patterns. :param list exclude: List of exclude patterns. :return: A dictionary mapping the file analyzed and the language detected and a dictionary mapping the languages detected in the directory and the total amount of lines of code, documentation, empty and string. :rtype: tuple of dict """ from pygount import source_analysis # Collect files collected = collect_files(directory, include, exclude) # Analyze files files = OrderedDict() sloc = OrderedDict() for relfname, absfname in joiner(directory, collected): # Perform analysis analysis = source_analysis( absfname, '', fallback_encoding='utf-8' ) # Get language and register the file as analyzed lang = analysis.language.lower() files[relfname] = lang # Ignore unknown and binary files if lang in ['__unknown__', '__binary__']: continue # Grab counter and perform accumulation counter = sloc.setdefault(lang, OrderedDict()) for count in ['code', 'documentation', 'empty', 'string']: counter[count] = counter.get(count, 0) + getattr(analysis, count) return files, sloc
def process_file(self, file, path): try: analysis = pygount.source_analysis(file, str(path), encoding='chardet') if analysis is not None and analysis.code > 0 and analysis.language is not None: self.analysis[file] = analysis if analysis.language in self.lang_files: self.lang_files[analysis.language].append(file) else: self.lang_files[analysis.language] = [file] self.process_files += 1 print(analysis.language, len(self.lang_files[analysis.language]), "processed files:", self.process_files, end=" " * 80 + "\r") except: self.log.warning("couldn't process file {}".format(file)) pass
def extract_LOC(file): analysis = pygount.source_analysis(file, 'pygount') try: return analysis.code, analysis.documentation except: return 0, 0
def execute(root_dir): """ Returns a dictionary containing the URL of a repository, number of lines, external libraries used , average nesting factor, percentage of code duplication, average number of parameters and average number of variables of the repository. Parameter: root_dir(path): Path to the local repository. Return: A dictionary containing the URL of a repository, number of lines, external libraries used , average nesting factor, percentage of code duplication, average number of parameters and average number of variables of the repository. """ #Getting all the file recursively that py files lenght = [] libraries = [] nesting_factors = [] param_count = [] total_var = [] duplicate_for_the_repo = [] average_nesting_factor = 0 average_param = 0 code_duplication = 0 avg_var = 0 k = root_dir.rsplit('-') n = k[0] m = k[-1] urls = [repo for repo in repo_list if n and m in repo] if urls: url = urls[0] else: url = root_dir for filename in glob.iglob(root_dir + '/**/*.py', recursive=True): #filename=filename.replace(" ", "\\ ") filename = str_to_raw(filename) try: count = pygount.source_analysis( filename, 'pygount') # counting the line of codes for the py files l = count.code lenght.append(l) library = imported_module(filename) for lib in library: libraries.append(lib) deg_list = nesting_factor(for_loop_position(filename)) for deg in deg_list: nesting_factors.append(deg) for param in parameter_count(filename): param_count.append(param) for var in variable_count(filename): total_var.append(var) duplicate_for_the_repo.append(duplicated_line(filename)) except Exception as e: print("type error: " + str(e)) print(filename) if len(nesting_factors) != 0: average_nesting_factor = np.mean(nesting_factors) if param_count: average_param = np.mean(param_count) libraries = unique(libraries) repo_count = sum(lenght) if total_var: avg_var = np.mean(total_var) if repo_count and duplicate_for_the_repo: code_duplication = (sum(duplicate_for_the_repo) / repo_count) * 100 return { 'repository_url': url, 'number of lines': repo_count, 'libraries': libraries, 'nesting factor': average_nesting_factor, 'code duplication': code_duplication, 'average parameters': average_param, 'average variables': avg_var }
bigger_file = None listdirs = next(os.walk(os.path.join(RESULTS_FOLDER, folder)))[1] if not listdirs: shutil.rmtree(os.path.join(RESULTS_FOLDER, folder)) else: for subfolder in listdirs: files = next( os.walk(os.path.join(RESULTS_FOLDER, folder, subfolder)))[2] if not files: shutil.rmtree(os.path.join(RESULTS_FOLDER, folder, subfolder)) else: for file in files: content = "" invalid = False n_lines = pygount.source_analysis( os.path.join(RESULTS_FOLDER, folder, subfolder, file), '').code with open( os.path.join(RESULTS_FOLDER, folder, subfolder, file), 'r') as f: for line in f: if is_invalid_line(line): invalid = True break content += line n_lines += 1 if invalid: continue try:
def main(): parser = argparse.ArgumentParser( description='''Summarize github stats for a user or organization.''') parser.add_argument('user', type=str, help='Github user or organization') parser.add_argument( '--include', type=str, help='''One or more repos to include, separated by commas. Note that --exclude takes priority over --include. ''') parser.add_argument( '--exclude', type=str, help='''One or more repos to exclude, separated by commas. Note that --exclude takes priority over --include. ''') parser.add_argument('-d', '--debug', default=False, type=bool, help='Show debug messages (default = False)') args = parser.parse_args() # "user" means user or organization user = args.user include = [] if args.include is None else args.include.split(',') exclude = [] if args.exclude is None else args.exclude.split(',') g = None environ = os.environ github_username = None if 'GITHUB_USERNAME' in environ: github_username = environ['GITHUB_USERNAME'] github_pwd = None if 'GITHUB_PWD' in environ: github_pwd = environ['GITHUB_PWD'] if (github_username is not None) and (github_pwd is not None): g = Github(github_username, github_pwd) else: g = Github() repo_count = 0 commit_count = 0 loc_count = 0 for repo in g.get_user(user).get_repos(): repo_name = repo.name if ((len(exclude) == 0 or repo_name not in exclude) and (len(include) == 0 or repo_name in include)): repo_count += 1 print(str(repo_count) + ') analyzing ' + repo_name + '...') for commit in repo.get_commits(): commit_count += 1 url_vars = {'user': user, 'repo': repo.name} url = 'https://github.com/{user}/{repo}/archive/master.zip'.format( **url_vars) r = requests.get(url, stream=True) if r.ok: z = zipfile.ZipFile(io.BytesIO(r.content)) with tempfile.TemporaryDirectory() as tmpdirname: z.extractall(tmpdirname) scanner = pygount.analysis.SourceScanner([tmpdirname]) source_paths = list(scanner.source_paths()) for source_path in source_paths: analysis = pygount.source_analysis( source_path[0], repo.name) loc_count += analysis[3] print('Totals') print('************************') print('repos: ' + str(repo_count)) print('commits: ' + str(commit_count)) print('lines (LOC): ' + str(loc_count)) return { 'repos': repo_count, 'commits': commit_count, 'lines': loc_count, }