class RepositoryAnalyzer: """Class to extract code language distribution from a software repository :params details: if enable, it returns fine-grained results :param kind: the analyzer kind (e.g., LINGUIST, CLOC) """ def __init__(self, details=False, kind=LINGUIST): self.details = details self.kind = kind if kind == LINGUIST: self.analyzer = Linguist() else: self.analyzer = Cloc() def analyze(self, repository_path): """Analyze the content of a repository using Linguist :param repository_path: repository path :returns a dict containing the results of the analysis, like the one below (for instance, repository is based on Python programming language entirely) { 'Python': 100.0 } """ kwargs = {'repository_path': repository_path, 'details': self.details} if self.kind == CLOC: kwargs['file_path'] = repository_path kwargs['repository_level'] = True analysis = self.analyzer.analyze(**kwargs) return analysis
def test_analyze_error(self, check_output_mock): """Test whether an exception is thrown in case of errors""" check_output_mock.side_effect = subprocess.CalledProcessError(-1, "command", output=b'output') cloc = Cloc() kwargs = {'file_path': os.path.join(self.tmp_data_path, ANALYZER_TEST_FILE)} with self.assertRaises(GraalError): _ = cloc.analyze(**kwargs)
class FileAnalyzer: """Class to analyse the content of files""" ALLOWED_EXTENSIONS = ['java', 'py', 'php', 'scala', 'js', 'rb', 'cs', 'cpp', 'c', 'lua', 'go', 'swift'] FORBIDDEN_EXTENSIONS = ['tar', 'bz2', "gz", "lz", "apk", "tbz2", "lzma", "tlz", "war", "xar", "zip", "zipx"] def __init__(self, details=False, kind=LIZARD_FILE): self.details = details self.kind = kind if self.kind == LIZARD_FILE: self.cloc = Cloc() self.lizard = Lizard() else: self.scc = SCC() def analyze(self, file_path): """Analyze the content of a file using CLOC, Lizard and SCC :param file_path: file path :returns a dict containing the results of the analysis, like the one below { 'blanks': .., 'comments': .., 'loc': .., 'ccn': .., 'avg_ccn': .., 'avg_loc': .., 'avg_tokens': .., 'num_funs': .., 'tokens': .., 'funs': [..] } """ kwargs = {'file_path': file_path} if self.kind == LIZARD_FILE: cloc_analysis = self.cloc.analyze(**kwargs) if GraalRepository.extension(file_path) not in self.ALLOWED_EXTENSIONS: return cloc_analysis kwargs['details'] = self.details file_analysis = self.lizard.analyze(**kwargs) # the LOC returned by CLOC is replaced by the one obtained with Lizard # for consistency purposes file_analysis['blanks'] = cloc_analysis['blanks'] file_analysis['comments'] = cloc_analysis['comments'] else: file_analysis = self.scc.analyze(**kwargs) return file_analysis
def test_analyze(self): """Test whether cloc returns the expected fields data""" cloc = Cloc() kwargs = {'file_path': os.path.join(self.tmp_data_path, ANALYZER_TEST_FILE)} result = cloc.analyze(**kwargs) self.assertIn('blanks', result) self.assertTrue(type(result['blanks']), int) self.assertIn('comments', result) self.assertTrue(type(result['comments']), int) self.assertIn('loc', result) self.assertTrue(type(result['loc']), int)
def test_analyze_repository_level(self): """Test whether cloc returns the expected fields data for repository level""" cloc = Cloc() kwargs = {'file_path': self.origin_path, 'repository_level': True} results = cloc.analyze(**kwargs) result = results[next(iter(results))] self.assertIn('blanks', result) self.assertTrue(type(result['blanks']), int) self.assertIn('comments', result) self.assertTrue(type(result['comments']), int) self.assertIn('loc', result) self.assertTrue(type(result['loc']), int) self.assertIn('total_files', result) self.assertTrue(type(result['total_files']), int)
def __analyze_repository(self, repository_path, files_affected, details): """Add code complexity information for a given repository using Lizard and CLOC. Current information includes cyclomatic complexity (ccn), lines of code, number of functions, tokens, blanks and comments. :param repository_path: repository path :param details: if True, it returns fine-grained results :returns result: list of the results of the analysis """ analysis_result = [] repository_analysis = lizard.analyze( paths=[repository_path], threads=1, exts=lizard.get_extensions([]), ) cloc = Cloc() for analysis in repository_analysis: cloc_analysis = cloc.analyze(file_path=analysis.filename) file_path = analysis.filename.replace(repository_path + "/", '') in_commit = True if file_path in files_affected else False result = { 'loc': analysis.nloc, 'ccn': analysis.CCN, 'tokens': analysis.token_count, 'num_funs': len(analysis.function_list), 'file_path': file_path, 'in_commit': in_commit, 'blanks': cloc_analysis['blanks'], 'comments': cloc_analysis['comments'] } analysis_result.append(result) # TODO: implement details option return analysis_result