def get_default_metrics(config): """ Get the default metrics for a configuration. :param config: The configuration :type config: :class:`wily.config.WilyConfig` :return: Return the list of default metrics in this index :rtype: ``list`` of ``str`` """ archivers = list_archivers(config) default_metrics = [] for archiver in archivers: index = get_archiver_index(config, archiver) if len(index) == 0: logger.warning("No records found in the index, no metrics available") return [] operators = index[0]["operators"] for operator in operators: o = resolve_operator(operator) if o.cls.default_metric_index is not None: metric = o.cls.metrics[o.cls.default_metric_index] default_metrics.append("{0}.{1}".format(o.cls.name, metric.name)) return default_metrics
def diff(config, files, metrics, changes_only=True, detail=True): """ Show the differences in metrics for each of the files. :param config: The wily configuration :type config: :namedtuple:`wily.config.WilyConfig` :param files: The files to compare. :type files: ``list`` of ``str`` :param metrics: The metrics to measure. :type metrics: ``list`` of ``str`` :param changes_only: Only include changes files in output. :type changes_only: ``bool`` :param detail: Show details (function-level) :type detail: ``bool`` """ config.targets = files files = list(files) state = State(config) last_revision = state.index[state.default_archiver].revisions[0] # Convert the list of metrics to a list of metric instances operators = {resolve_operator(metric.split(".")[0]) for metric in metrics} metrics = [(metric.split(".")[0], resolve_metric(metric)) for metric in metrics] data = {} results = [] # Build a set of operators _operators = [operator.cls(config) for operator in operators] cwd = os.getcwd() os.chdir(config.path) for operator in _operators: logger.debug(f"Running {operator.name} operator") data[operator.name] = operator.run(None, config) os.chdir(cwd) # Write a summary table.. extra = [] for operator, metric in metrics: if detail and resolve_operator(operator).level == OperatorLevel.Object: for file in files: try: extra.extend( [ f"{file}:{k}" for k in data[operator][file]["detailed"].keys() if k != metric.name and isinstance(data[operator][file]["detailed"][k], dict) ] ) except KeyError: logger.debug(f"File {file} not in cache") logger.debug("Cache follows -- ") logger.debug(data[operator]) files.extend(extra) logger.debug(files) for file in files: metrics_data = [] has_changes = False for operator, metric in metrics: try: current = last_revision.get( config, state.default_archiver, operator, file, metric.name ) except KeyError as e: current = "-" try: new = get_metric(data, operator, file, metric.name) except KeyError as e: new = "-" if new != current: has_changes = True if metric.type in (int, float) and new != "-" and current != "-": if current > new: metrics_data.append( "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format( current, new, BAD_COLORS[metric.measure] ) ) elif current < new: metrics_data.append( "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format( current, new, GOOD_COLORS[metric.measure] ) ) else: metrics_data.append("{0:n} -> {1:n}".format(current, new)) else: if current == "-" and new == "-": metrics_data.append("-") else: metrics_data.append("{0} -> {1}".format(current, new)) if has_changes or not changes_only: results.append((file, *metrics_data)) else: logger.debug(metrics_data) descriptions = [metric.description for operator, metric in metrics] headers = ("File", *descriptions) if len(results) > 0: print( # But it still makes more sense to show the newest at the top, so reverse again tabulate.tabulate( headers=headers, tabular_data=results, tablefmt=DEFAULT_GRID_STYLE ) )
def diff(config, files, metrics, changes_only=True, detail=True, revision=None): """ Show the differences in metrics for each of the files. :param config: The wily configuration :type config: :namedtuple:`wily.config.WilyConfig` :param files: The files to compare. :type files: ``list`` of ``str`` :param metrics: The metrics to measure. :type metrics: ``list`` of ``str`` :param changes_only: Only include changes files in output. :type changes_only: ``bool`` :param detail: Show details (function-level) :type detail: ``bool`` :param revision: Compare with specific revision :type revision: ``str`` """ config.targets = files files = list(files) state = State(config) # Resolve target paths when the cli has specified --path if config.path != DEFAULT_PATH: targets = [str(Path(config.path) / Path(file)) for file in files] else: targets = files # Expand directories to paths files = [ os.path.relpath(fn, config.path) for fn in radon.cli.harvest.iter_filenames(targets) ] logger.debug(f"Targeting - {files}") if not revision: target_revision = state.index[state.default_archiver].last_revision else: rev = resolve_archiver( state.default_archiver).cls(config).find(revision) logger.debug(f"Resolved {revision} to {rev.key} ({rev.message})") try: target_revision = state.index[state.default_archiver][rev.key] except KeyError: logger.error( f"Revision {revision} is not in the cache, make sure you have run wily build." ) exit(1) logger.info( f"Comparing current with {format_revision(target_revision.revision.key)} by {target_revision.revision.author_name} on {format_date(target_revision.revision.date)}." ) # Convert the list of metrics to a list of metric instances operators = {resolve_operator(metric.split(".")[0]) for metric in metrics} metrics = [(metric.split(".")[0], resolve_metric(metric)) for metric in metrics] results = [] # Build a set of operators with multiprocessing.Pool(processes=len(operators)) as pool: operator_exec_out = pool.starmap(run_operator, [(operator, None, config, targets) for operator in operators]) data = {} for operator_name, result in operator_exec_out: data[operator_name] = result # Write a summary table extra = [] for operator, metric in metrics: if detail and resolve_operator(operator).level == OperatorLevel.Object: for file in files: try: extra.extend([ f"{file}:{k}" for k in data[operator][file]["detailed"].keys() if k != metric.name and isinstance( data[operator][file]["detailed"][k], dict) ]) except KeyError: logger.debug(f"File {file} not in cache") logger.debug("Cache follows -- ") logger.debug(data[operator]) files.extend(extra) logger.debug(files) for file in files: metrics_data = [] has_changes = False for operator, metric in metrics: try: current = target_revision.get(config, state.default_archiver, operator, file, metric.name) except KeyError: current = "-" try: new = get_metric(data, operator, file, metric.name) except KeyError: new = "-" if new != current: has_changes = True if metric.type in (int, float) and new != "-" and current != "-": if current > new: metrics_data.append( "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format( current, new, BAD_COLORS[metric.measure])) elif current < new: metrics_data.append( "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format( current, new, GOOD_COLORS[metric.measure])) else: metrics_data.append("{0:n} -> {1:n}".format(current, new)) else: if current == "-" and new == "-": metrics_data.append("-") else: metrics_data.append("{0} -> {1}".format(current, new)) if has_changes or not changes_only: results.append((file, *metrics_data)) else: logger.debug(metrics_data) descriptions = [metric.description for operator, metric in metrics] headers = ("File", *descriptions) if len(results) > 0: print( # But it still makes more sense to show the newest at the top, so reverse again tabulate.tabulate(headers=headers, tabular_data=results, tablefmt=DEFAULT_GRID_STYLE))
def build(config, archiver, operators): """ Build the history given a archiver and collection of operators. :param config: The wily configuration :type config: :namedtuple:`wily.config.WilyConfig` :param archiver: The archiver to use :type archiver: :namedtuple:`wily.archivers.Archiver` :param operators: The list of operators to execute :type operators: `list` of :namedtuple:`wily.operators.Operator` """ try: logger.debug(f"Using {archiver.name} archiver module") archiver = archiver.cls(config) revisions = archiver.revisions(config.path, config.max_revisions) except InvalidGitRepositoryError: # TODO: This logic shouldn't really be here (SoC) logger.info(f"Defaulting back to the filesystem archiver, not a valid git repo") archiver = FilesystemArchiver(config) revisions = archiver.revisions(config.path, config.max_revisions) except Exception as e: if hasattr(e, "message"): logger.error(f"Failed to setup archiver: '{e.message}'") else: logger.error(f"Failed to setup archiver: '{type(e)} - {e}'") exit(1) state = State(config, archiver=archiver) # Check for existence of cache, else provision state.ensure_exists() index = state.index[archiver.name] # remove existing revisions from the list revisions = [revision for revision in revisions if revision not in index][::-1] logger.info( f"Found {len(revisions)} revisions from '{archiver.name}' archiver in '{config.path}'." ) _op_desc = ",".join([operator.name for operator in operators]) logger.info(f"Running operators - {_op_desc}") bar = Bar("Processing", max=len(revisions) * len(operators)) state.operators = operators # Index all files the first time, only scan changes afterward seed = True prev_roots = None try: with multiprocessing.Pool(processes=len(operators)) as pool: for revision in revisions: # Checkout target revision archiver.checkout(revision, config.checkout_options) stats = {"operator_data": {}} if seed: targets = config.targets else: # Only target changed files # TODO : Check that changed files are children of the targets targets = [ str(pathlib.Path(config.path) / pathlib.Path(file)) for file in revision.files # if any([True for target in config.targets if # target in pathlib.Path(pathlib.Path(config.path) / pathlib.Path(file)).parents]) ] # Run each operator as a separate process data = pool.starmap( run_operator, [(operator, revision, config, targets) for operator in operators], ) # data is a list of tuples, where for each operator, it is a tuple of length 2, operator_data_len = 2 # second element in the tuple, i.e data[i][1]) has the collected data for i in range(0, len(operators)): if i < len(data) and len(data[i]) >= operator_data_len and len(data[i][1]) == 0: logger.warn(f"In revision {revision.key}, for operator {operators[i].name}: No data collected") # Map the data back into a dictionary for operator_name, result in data: # find all unique directories in the results roots = {pathlib.Path(entry).parents[0] for entry in result.keys()} indices = set(result.keys()) # For a seed run, there is no previous change set, so use current if seed: prev_roots = roots prev_indices = indices roots = prev_roots | roots # Copy the ir from any unchanged files from the prev revision if not seed: missing_indices = prev_indices - indices # TODO: Check existence of file path. for missing in missing_indices: # Don't copy aggregate keys as their values may have changed if missing in roots: continue # previous index may not have that operator if operator_name not in prev_stats["operator_data"]: continue # previous index may not have file either if ( missing not in prev_stats["operator_data"][operator_name] ): continue result[missing] = prev_stats["operator_data"][ operator_name ][missing] # Aggregate metrics across all root paths using the aggregate function in the metric for root in roots: # find all matching entries recursively aggregates = [ path for path in result.keys() if root in pathlib.Path(path).parents ] result[str(root)] = {"total": {}} # aggregate values for metric in resolve_operator(operator_name).cls.metrics: func = metric.aggregate values = [ result[aggregate]["total"][metric.name] for aggregate in aggregates if aggregate in result and metric.name in result[aggregate]["total"] ] if len(values) > 0: result[str(root)]["total"][metric.name] = func(values) prev_indices = set(result.keys()) prev_roots = roots stats["operator_data"][operator_name] = result bar.next() prev_stats = stats seed = False ir = index.add(revision, operators=operators) ir.store(config, archiver, stats) index.save() bar.finish() except Exception as e: logger.error(f"Failed to build cache: {type(e)}: '{e}'") raise e finally: # Reset the archive after every run back to the head of the branch archiver.finish()
def build(config, archiver, operators): """ Build the history given a archiver and collection of operators. :param config: The wily configuration :type config: :namedtuple:`wily.config.WilyConfig` :param archiver: The archiver to use :type archiver: :namedtuple:`wily.archivers.Archiver` :param operators: The list of operators to execute :type operators: `list` of :namedtuple:`wily.operators.Operator` """ try: logger.debug(f"Using {archiver.name} archiver module") archiver = archiver.cls(config) revisions = archiver.revisions(config.path, config.max_revisions) except InvalidGitRepositoryError: # TODO: This logic shouldn't really be here (SoC) logger.info( f"Defaulting back to the filesystem archiver, not a valid git repo" ) archiver = FilesystemArchiver(config) revisions = archiver.revisions(config.path, config.max_revisions) except Exception as e: if hasattr(e, "message"): logger.error(f"Failed to setup archiver: '{e.message}'") else: logger.error(f"Failed to setup archiver: '{type(e)} - {e}'") exit(1) state = State(config, archiver=archiver) # Check for existence of cache, else provision state.ensure_exists() index = state.index[archiver.name] # remove existing revisions from the list revisions = [revision for revision in revisions if revision not in index] logger.info( f"Found {len(revisions)} revisions from '{archiver.name}' archiver in '{config.path}'." ) _op_desc = ",".join([operator.name for operator in operators]) logger.info(f"Running operators - {_op_desc}") bar = Bar("Processing", max=len(revisions) * len(operators)) state.operators = operators try: with multiprocessing.Pool(processes=len(operators)) as pool: for revision in revisions: # Checkout target revision archiver.checkout(revision, config.checkout_options) stats = {"operator_data": {}} # Run each operator as a seperate process data = pool.starmap( run_operator, [(operator, revision, config) for operator in operators], ) # Map the data back into a dictionary for operator_name, result in data: # aggregate values to directories roots = [] # find all unique directories in the results for entry in result.keys(): parent = pathlib.Path(entry).parents[0] if parent not in roots: roots.append(parent) for root in roots: # find all matching entries recursively aggregates = [ path for path in result.keys() if root in pathlib.Path(path).parents ] result[str(root)] = {"total": {}} # aggregate values for metric in resolve_operator( operator_name).cls.metrics: func = metric.aggregate values = [ result[aggregate]["total"][metric.name] for aggregate in aggregates if aggregate in result and metric.name in result[aggregate]["total"] ] if len(values) > 0: result[str(root)]["total"][metric.name] = func( values) stats["operator_data"][operator_name] = result bar.next() ir = index.add(revision, operators=operators) ir.store(config, archiver, stats) index.save() bar.finish() except Exception as e: logger.error(f"Failed to build cache: '{e}'") raise e finally: # Reset the archive after every run back to the head of the branch archiver.finish()