def test_argparse_no_patch(self): parser = argparse.ArgumentParser() method = parser.parse_args slogging.add_logging_args(parser, False) self.assertEqual(method, parser.parse_args) parser = argparse.ArgumentParser() method = parser.parse_args slogging.add_logging_args(parser) self.assertNotEqual(method, parser.parse_args)
def run_slogging_main(): parser = argparse.ArgumentParser() slogging.add_logging_args(parser) def my_setup(level: Union[str, int], structured: bool, config_path: str): print(level) print(structured) print(config_path) slogging.setup = my_setup parser.parse_args()
def test_argparse_erase_args(self): log_args = {"log_level", "log_structured", "log_config"} parser = argparse.ArgumentParser() slogging.add_logging_args(parser, erase_args=True) with patch.object(sys, "argv"): with patch("modelforge.slogging.setup"): def my_setup(*args): pass slogging.setup = my_setup args = parser.parse_args() self.assertEqual(len(log_args.intersection(vars(args))), 0)
def create_parser() -> ArgumentParser: """ Create a parser for the lookout.style.format utility. :return: an ArgumentParser with an handler defined in the handler attribute. """ # Deferred imports to speed up loading __init__ from lookout.style.format.benchmarks.compare_quality_reports import \ compare_quality_reports_entry from lookout.style.format.benchmarks.evaluate_smoke import evaluate_smoke_entry from lookout.style.format.benchmarks.generate_smoke import generate_smoke_entry from lookout.style.format.benchmarks.quality_report import generate_quality_report from lookout.style.format.benchmarks.general_report import print_reports from lookout.style.format.benchmarks.quality_report_noisy import quality_report_noisy from lookout.style.format.benchmarks.expected_vnodes_number import \ calc_expected_vnodes_number_entry parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatterNoNone) # General options slogging.add_logging_args(parser) subparsers = parser.add_subparsers(help="Commands") def add_parser(name, help): return subparsers.add_parser( name, help=help, formatter_class=ArgumentDefaultsHelpFormatterNoNone) # Evaluation eval_parser = add_parser("eval", "Evaluate trained model on given dataset.") eval_parser.set_defaults(handler=print_reports) add_input_pattern_arg(eval_parser) add_bblfsh_arg(eval_parser) add_model_args(eval_parser) eval_parser.add_argument( "-n", "--n-files", default=0, type=int, help="How many files with most mispredictions to show. " "If n <= 0 show all.") # Generate quality report for the given data quality_report_parser = add_parser( "quality-report", "Generate quality report on a given data.") quality_report_parser.set_defaults(handler=generate_quality_report) quality_report_parser.add_argument( "-i", "--input", required=True, help= "csv file with repositories to make report. Should contain url, to and from columns." ) quality_report_parser.add_argument("-o", "--output", required=True, help="Directory where to save results.") quality_report_parser.add_argument( "-f", "--force", default=False, action="store_true", help="Force to overwrite results stored in output directory if True. \ Stored results will be used if False.") quality_report_parser.add_argument("-b", "--bblfsh", help="Bblfsh address to use.") quality_report_parser.add_argument( "--config", type=json.loads, default="{}", help="Config for analyzer in json format.") quality_report_parser.add_argument( "--database", default=None, help="sqlite3 database path to store the models." "Temporary file is used if not set.") quality_report_parser.add_argument( "--fs", default=None, help="Model repository file system root. " "Temporary directory is used if not set.") # Generate the quality report based on the artificial noisy dataset quality_report_noisy_parser = add_parser( "quality-report-noisy", "Quality report on the " "artificial noisy dataset") quality_report_noisy_parser.set_defaults(handler=quality_report_noisy) add_bblfsh_arg(quality_report_noisy_parser) add_rules_thresholds(quality_report_noisy_parser) quality_report_noisy_parser.add_argument( "-l", "--language", default="javascript", help="Programming language to use.") quality_report_noisy_parser.add_argument( "--repos", type=str, help= "list of urls or paths to the repositories to analyze. Should be strings separated " "by newlines.") quality_report_noisy_parser.add_argument( "--precision-threshold", type=float, default=0.95, help="Precision threshold tolerated for the model.") quality_report_noisy_parser.add_argument( "-o", "--dir-output", required=True, type=str, help= "Path to the output directory where to store the quality report and the " "precision-recall curve.") # Compare two quality reports summaries compare_quality_parser = add_parser( "compare-quality", "Creates a file with the differences in quality metrics between two reports." ) compare_quality_parser.set_defaults(handler=compare_quality_reports_entry) compare_quality_parser.add_argument( "--base", type=str, required=True, help= "Baseline report. Usually the latest report from ./report/ directory.") compare_quality_parser.add_argument( "--new", type=str, required=True, help= "New report. Usually It is a report generated for master or any local \ change you did and want to validate.") compare_quality_parser.add_argument( "-o", "--output", type=str, required=True, help="Path to the file to save result or - to print to stdout.") # Generate dataset of different styles in code for smoke testing. gen_smoke_parser = add_parser( "gen-smoke-dataset", "Generate dataset with different styles. " "Helps to check the basic system functionality. " "Only JavaScript code is supported now.") gen_smoke_parser.set_defaults(handler=generate_smoke_entry) gen_smoke_parser.add_argument( "inputpath", type=str, help="Path to the tar.xz archive containing initial repositories.") gen_smoke_parser.add_argument( "outputpath", type=str, help= "Path to the directory where the generated dataset should be stored.") gen_smoke_parser.add_argument("--force", default=False, action="store_true", help="Override output directory if exists.") # Evaluate on different styles dataset eval_smoke_parser = add_parser( "eval-smoke-dataset", "Evaluate on the dataset with different styles.") eval_smoke_parser.set_defaults(handler=evaluate_smoke_entry) eval_smoke_parser.add_argument( "inputpath", type=str, help="Path to the directory where the generated dataset is stored. " "To generate a dataset run gen-smoke-dataset command.") eval_smoke_parser.add_argument( "reportdir", type=str, help="Path for report performance output directory.") eval_smoke_parser.add_argument("--bblfsh", help="Babelfish server's address.") eval_smoke_parser.add_argument("--config", type=json.loads, default="{}", help="JSON config for FormatAnalyzer.") eval_smoke_parser.add_argument( "--database", type=str, default=None, help="Path to the sqlite3 database with trained models metadata. " "Enables reusing previously trained models.") rule_parser = add_parser("rule", "Print rule description by its hash.") rule_parser.set_defaults(handler=dump_rule_entry) rule_parser.add_argument("model", help="Path to the model file.") rule_parser.add_argument("hash", help="Hash of the rule (8 chars).") # FIXME(zurk): remove when https://github.com/src-d/style-analyzer/issues/557 is resolved calc_expected_vnodes = add_parser( "calc-expected-vnodes-number", "Write the CSV file with expected numbers of virtual nodes " "extracted from repositories. Required for quality report " "generation. It is a workaround for " "https://github.com/src-d/style-analyzer/issues/557. " "Docker service is required to be running.") calc_expected_vnodes.set_defaults( handler=calc_expected_vnodes_number_entry) calc_expected_vnodes.add_argument( "-i", "--input", required=True, help="CSV file with repositories for quality report." "Should contain url, to and from columns.") calc_expected_vnodes.add_argument("-o", "--output", required=True, help="Path to a output csv file.") calc_expected_vnodes.add_argument( "-r", "--runs", default=3, help="Number of repeats to ensure the result correctness.") return parser
def parse_args() -> argparse.Namespace: """ Create the cmdline argument parser. """ parser = argparse.ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatterNoNone) slogging.add_logging_args(parser, patch=True, erase_args=False) # Create and construct subparsers subparsers = parser.add_subparsers(help="Commands", dest="command") def add_parser(name, help_message): return subparsers.add_parser( name, help=help_message, formatter_class=ArgumentDefaultsHelpFormatterNoNone) # -------------------------------------------------------------------------------------------- clickhouse2deps_parser = add_parser( "clickhouse2deps", "Extract dependencies from a ClickHouse DB.") clickhouse2deps_parser.set_defaults(handler=clickhouse2deps) clickhouse2deps_parser.add_argument( "-o", "--output-path", type=Path, help= "Output path to the resulting ASDF model with the extracted dependencies.", ) clickhouse2deps_parser.add_argument( "-f", "--force", action="store_true", help= "Boolean indicating whether to overwrite the existing ASDF model specified by " "-o/--output-path.", ) clickhouse2deps_parser.add_argument("--user", default="default", help="Username for the DB.") clickhouse2deps_parser.add_argument("--password", default="", help="Password for the DB.") clickhouse2deps_parser.add_argument("--host", default="0.0.0.0", help="Host for the DB.") clickhouse2deps_parser.add_argument("--port", default=9000, type=int, help="Port for the DB.") clickhouse2deps_parser.add_argument("--database", default="default", help="Database name for the DB.") clickhouse2deps_parser.add_argument("--table", default="uasts", help="Table name for the DB.") clickhouse2deps_parser.add_argument( "--langs", nargs="+", default=CLICKHOUSE_LANGS, choices=CLICKHOUSE_LANGS, help="Languages to consider while extracting dependencies.", ) # -------------------------------------------------------------------------------------------- collect_stdlibs_parser = add_parser( "collect-stdlibs", "Collect the lists of standard libraries for each language Babelfish can parse.", ) collect_stdlibs_parser.set_defaults(handler=collect_stdlibs) collect_stdlibs_parser.add_argument( "-o", "--output-path", type=Path, help= "Output path to the resulting ASDF model with the extracted standard libraries.", ) collect_stdlibs_parser.add_argument( "-f", "--force", action="store_true", help= "Boolean indicating whether to overwrite the existing ASDF model specified by " "-o/--output-path.", ) args = parser.parse_args() if not hasattr(args, "handler"): args.handler = lambda _: parser.print_usage() # noqa: E731 return args
def create_parser() -> ArgumentParser: """ Create a parser for the lookout.style.typos utility. :return: an ArgumentParser with an handler defined in the handler attribute. """ parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatterNoNone) # General options slogging.add_logging_args(parser) subparsers = parser.add_subparsers(help="Commands") def add_parser(name, help): return subparsers.add_parser( name, help=help, formatter_class=ArgumentDefaultsHelpFormatterNoNone) # Prepare raw data for corrector prepare_parser = add_parser("prepare-data", "Prepare raw dataset for corrector training.") prepare_parser.set_defaults(handler=prepare_data) add_config_arg(prepare_parser) # Train new fasttext model on gien data fasttext_parser = add_parser( "train-fasttext", "Train fasttext model on the given dataset" "of code identifiers.") fasttext_parser.set_defaults(handler=cli_train_fasttext) add_data_path_arg(fasttext_parser) add_config_arg(fasttext_parser) # Create train and test datasets with artificial typos datasets_parser = add_parser( "get-datasets", "Create the train and the test datasets of typos.") datasets_parser.set_defaults(handler=cli_get_datasets) add_data_path_arg(datasets_parser) add_config_arg(datasets_parser) # Create, train and evaluate new corrector model train_parser = add_parser( "train", "Create and train TyposCorrector model on the given data.") train_parser.set_defaults(handler=cli_train_corrector) train_parser.add_argument( "--train", required=False, type=str, default=DEFAULT_CORRECTOR_CONFIG["datasets"]["train_path"], help= ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.", ) train_parser.add_argument( "--test", required=False, type=str, default=DEFAULT_CORRECTOR_CONFIG["datasets"]["test_path"], help= ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.", ) train_parser.add_argument( "-v", "--vocabulary-path", required=False, type=str, default=os.path.join( DEFAULT_CORRECTOR_CONFIG["preparation"]["data_dir"], DEFAULT_CORRECTOR_CONFIG["preparation"]["vocabulary_filename"]), help="Path to a .csv file with vocabulary.", ) train_parser.add_argument( "-f", "--frequencies-path", required=False, type=str, default=os.path.join( DEFAULT_CORRECTOR_CONFIG["preparation"]["data_dir"], DEFAULT_CORRECTOR_CONFIG["preparation"]["frequencies_filename"]), help="Path to a .csv file with tokens' frequencies.", ) train_parser.add_argument( "-e", "--fasttext-path", required=False, type=str, default=DEFAULT_CORRECTOR_CONFIG["fasttext"]["path"], help="Path to a FastText model's dump (.bin).", ) add_config_arg(train_parser) add_corrector_path_arg(train_parser) ######################################## # One command to rule them all ######################################## train_from_scratch_parser = add_parser( "train-from-scratch", "Create and train TyposCorrector model on the given data.") train_from_scratch_parser.set_defaults(handler=train_from_scratch) add_config_arg(train_from_scratch_parser) # Report for Typo Commits Dataset typo_commits_report_parser = add_parser( "typo-commits-report", "Generate report for Typo Commits Dataset.") typo_commits_report_parser.set_defaults( handler=generate_typos_report_entry) add_config_arg(typo_commits_report_parser) typo_commits_report_parser.add_argument( "-i", "--dataset", required=True, help= "csv file with commits with typos. Must contain wrong_id, correct_id, file, line, " "commit_fix, repo, commit_typo columns. It is possible to specify the xz compressed " "file") typo_commits_report_parser.add_argument( "-o", "--output", required=True, help="Directory where to save results.") typo_commits_report_parser.add_argument("-b", "--bblfsh", help="Bblfsh address to use.") typo_commits_report_parser.add_argument( "--database", default=None, help="sqlite3 database path to store the models." "Temporary file is used if not set.") typo_commits_report_parser.add_argument( "--fs", default=None, help="Model repository file system root. " "Temporary directory is used if not set.") typo_commits_report_parser.add_argument( "--repos-cache", default=None, required=False, help= "Directory where to download repositories from the dataset. It is strongly \ recommended to set this parameter if there are more than 20 repositories \ in the dataset. Temporary directory is used if not set.") return parser
def create_parser() -> ArgumentParser: """ Create a parser for the lookout.style.typos utility. :return: an ArgumentParser with an handler defined in the handler attribute. """ parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatterNoNone) # General options slogging.add_logging_args(parser) subparsers = parser.add_subparsers(help="Commands") def add_parser(name, help): return subparsers.add_parser( name, help=help, formatter_class=ArgumentDefaultsHelpFormatterNoNone) # Prepare raw data for corrector prepare_parser = add_parser("prepare-data", "Prepare raw dataset for corrector training.") prepare_parser.set_defaults(handler=prepare_data) add_config_arg(prepare_parser) # Train new fasttext model on gien data fasttext_parser = add_parser( "train-fasttext", "Train fasttext model on the given dataset" "of code identifiers.") fasttext_parser.set_defaults(handler=cli_train_fasttext) add_data_path_arg(fasttext_parser) add_config_arg(fasttext_parser) # Create train and test datasets with artificial typos datasets_parser = add_parser( "get-datasets", "Create the train and the test datasets of typos.") datasets_parser.set_defaults(handler=cli_get_datasets) add_data_path_arg(datasets_parser) add_config_arg(datasets_parser) # Create, train and evaluate new corrector model train_parser = add_parser( "train", "Create and train TyposCorrector model on the given data.") train_parser.set_defaults(handler=cli_train_corrector) train_parser.add_argument( "--train", required=False, type=str, default=DEFAULT_CONFIG["datasets"]["train_path"], help= ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.", ) train_parser.add_argument( "--test", required=False, type=str, default=DEFAULT_CONFIG["datasets"]["test_path"], help= ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.", ) train_parser.add_argument( "-v", "--vocabulary-path", required=False, type=str, default=os.path.join( DEFAULT_CONFIG["preparation"]["data_dir"], DEFAULT_CONFIG["preparation"]["vocabulary_filename"]), help="Path to a .csv file with vocabulary.", ) train_parser.add_argument( "-f", "--frequencies-path", required=False, type=str, default=os.path.join( DEFAULT_CONFIG["preparation"]["data_dir"], DEFAULT_CONFIG["preparation"]["frequencies_filename"]), help="Path to a .csv file with tokens' frequencies.", ) train_parser.add_argument( "-e", "--fasttext-path", required=False, type=str, default=DEFAULT_CONFIG["fasttext"]["path"], help="Path to a FastText model's dump (.bin).", ) add_corrector_path_arg(train_parser) ######################################## # One command to rule them all ######################################## train_from_scratch_parser = add_parser( "train-from-scratch", "Create and train TyposCorrector model on the given data.") train_from_scratch_parser.set_defaults(handler=train_from_scratch) add_config_arg(train_from_scratch_parser) return parser