def load_parameters(json_parameters): parameters = Parameters() parameters.clustering_threshold = json_parameters['clustering_threshold'] parameters.size_threshold = json_parameters['size_threshold'] parameters.distance_threshold = json_parameters['distance_threshold'] parameters.hashing_depth = json_parameters['hashing_depth'] parameters.clusterize_using_dcup = json_parameters['clusterize_using_dcup'] parameters.clusterize_using_hash = json_parameters['clusterize_using_hash'] parameters.report_unifiers = json_parameters['report_unifiers'] parameters.force = json_parameters['force'] parameters.use_diff = json_parameters['use_diff']
def scan(language, file_manifest, source_file_names): # Determine the files to scan. If no files are given, use a default manifest. if len(source_file_names) == 0 and file_manifest is None: file_manifest = manifest.default_manifest(language) source_file_names = set(source_file_names) if file_manifest is not None: source_file_names.update(set(manifest.contents(file_manifest))) supplier = ast_suppliers.abstract_syntax_tree_suppliers[language] # TODO: Configuration files! parameters = Parameters() parameters.distance_threshold = supplier.distance_threshold parameters.size_threshold = supplier.size_threshold source_files = [] report = Report(parameters) def parse_file(file_name): try: logging.info('Parsing ' + file_name + '...') source_file = supplier(file_name, parameters) source_file.getTree().propagateCoveredLineNumbers() source_file.getTree().propagateHeight() source_files.append(source_file) report.addFileName(file_name) logging.info('done') except: logging.warn('Can\'t parse "%s" \n: ' % (file_name,) + traceback.format_exc()) for file_name in source_file_names: parse_file(file_name) duplicates = clone_detection_algorithm.findDuplicateCode(source_files, report) n = 1 for duplicate in duplicates: distance = duplicate.calcDistance() summary = CloneSummary( "Clone #"+str(n), [ # TODO: This is a mess! Most of this info should be assembled on the fly and in member functions. Snippet( duplicate[i].getSourceFile()._file_name, duplicate[i].getCoveredLineNumbers(), '\n'.join([line for line in duplicate[i].getSourceLines()]) ) for i in [0, 1]], distance) report.addClone(summary) n += 1 report.sortByCloneSize() save_report(".orphanblack", report)