Пример #1
0
def setup():
    parser = argparse.ArgumentParser()
    parser.add_argument("sequence")
    parser.add_argument("-o", "--output", required=True)
    parser.add_argument("--hash-rounds", type=int, default=10)
    args = parser.parse_args()
    setup_logging("INFO")
    return args
Пример #2
0
 def test_setup(self):
     with captured_output() as (out, err, log):
         root = logging.getLogger()
         if len(root.handlers) == 1:
             root.handlers.insert(0, logging.StreamHandler())
         setup_logging("INFO")
         logger = logging.getLogger("test")
         logger.info("success")
     self.assertIn("test", err.getvalue())
     self.assertIn("success", err.getvalue())
     self.assertIn("1;36", err.getvalue())
Пример #3
0
def setup():
    setup_logging("INFO")
    global ENRY
    if ENRY is not None:
        return
    ENRY = os.path.join(tempfile.mkdtemp(), "enry")
    if os.path.isfile("enry"):
        shutil.copy("enry", ENRY)
    else:
        install_enry(target=ENRY)
    ensure_bblfsh_is_running_noexc()
Пример #4
0
def main():
    """
    Creates all the argparse-rs and invokes the function from set_defaults().
    :return: The result of the function from set_defaults().
    """

    parser = get_parser()
    args = parser.parse_args()
    args.log_level = logging._nameToLevel[args.log_level]
    setup_logging(args.log_level)
    try:
        handler = args.handler
    except AttributeError:
        def print_usage(_):
            parser.print_usage()

        handler = print_usage
    return handler(args)
Пример #5
0
def initialize(log_level=logging.INFO, enry="./enry"):
    """
    Sets up the working environment: enables logging, launches the Babelfish
    server if it is not running, installs src-d/enry if it is not found in
    PATH.
    :param log_level: The verbosity level. Can be either an integer (e.g. \
    logging.INFO) or a string (e.g. "INFO").
    :param enry: The path to the linguist/enry executable. It if it exists,
    nothing happens. If it is not, src-d/enry is compiled into that file.
    :return:
    """
    global __initialized__
    if __initialized__:
        return
    setup_logging(log_level)
    ensure_bblfsh_is_running_noexc()
    install_enry(target=enry, warn_exists=False)
    __initialized__ = True
Пример #6
0
def main():
    """
    Creates all the argument parsers and invokes the function from set_defaults().

    :return: The result of the function from set_defaults().
    """
    parser = get_parser()
    args = parser.parse_args()
    args.log_level = logging._nameToLevel[args.log_level]
    setup_logging(args.log_level)
    try:
        handler = args.handler
    except AttributeError:
        def print_usage(_):
            parser.print_usage()

        handler = print_usage
    return handler(args)
Пример #7
0
    def process_entry(cls, url_or_path: str, args: dict, outdir: str,
                      queue: multiprocessing.Queue, organize_files: int):
        """
        Invokes process_repo() in a separate process. The reason we do this is that grpc
        starts hanging background threads for every channel which poll(). Those threads
        do not exit when the channel is destroyed. It is fine for a single repository, but
        quickly hits the system limit in case of many.

        This method is intended for the batch processing.

        :param url_or_path: File system path or a URL to clone.
        :param args: :class:`dict`-like container with the arguments to cls().
        :param outdir: The output directory.
        :param queue: :class:`multiprocessing.Queue` to report the status.
        :param organize_files: Perform alphabetical directory indexing of provided level. \
            Expand output path by subfolders using the first n characters of repository, \
            for example for "organize_files=2" file ababa is saved to /a/ab/ababa, abcoasa \
            is saved to /a/bc/abcoasa, etc.
        :return: The child process' exit code.
        """
        if "log_level" in args:
            setup_logging(args.pop("log_level"))
        if "grpc" in sys.modules:
            logging.getLogger(cls.__name__).error("grpc detected, fork() is unstable -> aborted")
            queue.put((url_or_path, 0))
            return 0
        pid = os.fork()
        if pid == 0:
            outfile = cls.prepare_filename(url_or_path, outdir, organize_files)
            status = cls(**args).process_repo(url_or_path, outfile)
            if multiprocessing.get_start_method() == "fork":
                sys.exit(status)
            os._exit(status)
        else:
            _, status = os.waitpid(pid, 0)
            queue.put((url_or_path, status))
            return status
Пример #8
0
 def setUpClass(cls):
     setup_logging("DEBUG")
Пример #9
0
def setup():
    setup_logging("INFO")
Пример #10
0
def setup():
    setup_logging("INFO")
Пример #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input", help="Repository URL or path or name.")
    parser.add_argument("--log-level", default="INFO",
                        choices=logging._nameToLevel,
                        help="Logging verbosity.")
    parser.add_argument("--id2vec", default=None,
                        help="id2vec model URL or path.")
    parser.add_argument("--df", default=None,
                        help="Document frequencies URL or path.")
    parser.add_argument("--bow", default=None,
                        help="BOW model URL or path.")
    parser.add_argument("--prune-df", default=20, type=int,
                        help="Minimum number of times an identifier must occur in the dataset "
                             "to be taken into account.")
    parser.add_argument("--vocabulary-min", default=50, type=int,
                        help="Minimum number of words in a bag.")
    parser.add_argument("--vocabulary-max", default=500, type=int,
                        help="Maximum number of words in a bag.")
    parser.add_argument("-n", "--nnn", default=10, type=int,
                        help="Number of nearest neighbours.")
    parser.add_argument("--early-stop", default=0.1, type=float,
                        help="Maximum fraction of the nBOW dataset to scan.")
    parser.add_argument("--max-time", default=300, type=int,
                        help="Maximum time to spend scanning in seconds.")
    parser.add_argument("--skipped-stop", default=0.95, type=float,
                        help="Minimum fraction of skipped samples to stop.")
    languages = ["Java", "Python", "Go", "JavaScript", "TypeScript", "Ruby", "Bash", "Php"]
    parser.add_argument(
        "-l", "--languages", nargs="+", choices=languages,
        default=None,  # Default value for --languages arg should be None.
        # Otherwise if you process parquet files without 'lang' column, you will
        # fail to process it with any --languages argument.
        help="The programming languages to analyse.")
    parser.add_argument("--blacklist-languages", action="store_true",
                        help="Exclude the languages in --languages from the analysis "
                             "instead of filtering by default.")
    parser.add_argument(
        "-s", "--spark", default=SparkDefault.MASTER_ADDRESS,
        help="Spark's master address.")
    parser.add_argument("--bblfsh", default=EngineDefault.BBLFSH,
                        help="Babelfish server's address.")
    parser.add_argument("--engine", default=EngineDefault.VERSION,
                        help="source{d} jgit-spark-connector version.")
    args = parser.parse_args()
    setup_logging(args.log_level)
    backend = create_backend()
    if args.id2vec is not None:
        args.id2vec = Id2Vec().load(source=args.id2vec, backend=backend)
    if args.df is not None:
        args.df = DocumentFrequencies().load(source=args.df, backend=backend)
    if args.bow is not None:
        args.bow = BOW().load(source=args.bow, backend=backend)
    sr = SimilarRepositories(
        id2vec=args.id2vec, df=args.df, nbow=args.bow,
        prune_df_threshold=args.prune_df,
        wmd_cache_centroids=False,  # useless for a single query
        wmd_kwargs={"vocabulary_min": args.vocabulary_min,
                    "vocabulary_max": args.vocabulary_max},
        languages=(args.languages, args.blacklist_languages),
        engine_kwargs={"spark": args.spark,
                       "bblfsh": args.bblfsh,
                       "engine": args.engine},
    )
    neighbours = sr.query(
        args.input, k=args.nnn, early_stop=args.early_stop,
        max_time=args.max_time, skipped_stop=args.skipped_stop)
    for index, rate in neighbours:
        print("%48s\t%.2f" % (index, rate))
Пример #12
0
def main():
    """
    Creates all the argparse-rs and invokes the function from set_defaults().
    :return: The result of the function from set_defaults().
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--log-level",
                        default="INFO",
                        choices=logging._nameToLevel,
                        help="Logging verbosity.")
    subparsers = parser.add_subparsers(help="Commands", dest="command")

    def add_backend_args(p):
        p.add_argument("--backend", default=None, help="Backend to use.")
        p.add_argument("--args", default=None, help="Backend's arguments.")

    def add_index_args(p):
        p.add_argument("--username",
                       default="",
                       help="Username for the Git repository with the index.")
        p.add_argument("--password",
                       default="",
                       help="Password for the Git repository with the index")
        p.add_argument("--index-repo",
                       default=None,
                       help="Url of the remote Git repository.")
        p.add_argument(
            "--cache",
            default=None,
            help="Path to the folder where the Git repository will be cached.")

    def add_templates_args(p):
        p.add_argument(
            "--template-model",
            default=os.path.join(os.path.dirname(__file__),
                                 "templates/template_model.md.jinja2"),
            help="Path to the jinja2 template used in the index for the model."
        )
        p.add_argument(
            "--template-readme",
            default=os.path.join(os.path.dirname(__file__),
                                 "templates/template_readme.md.jinja2"),
            help="Path to the jinja2 template used in the index for the readme."
        )

    # ------------------------------------------------------------------------
    init_parser = subparsers.add_parser("init",
                                        help="Initialize the registry.")
    init_parser.set_defaults(handler=initialize_registry)
    init_parser.add_argument("-f",
                             "--force",
                             action="store_true",
                             help="Destructively initialize the registry.")
    add_index_args(init_parser)
    add_backend_args(init_parser)
    # ------------------------------------------------------------------------
    dump_parser = subparsers.add_parser(
        "dump", help="Print a brief information about the model to stdout.")
    dump_parser.set_defaults(handler=dump_model)
    dump_parser.add_argument("input",
                             help="Path to the model file, URL or UUID.")
    add_index_args(dump_parser)
    add_backend_args(dump_parser)
    # ------------------------------------------------------------------------
    publish_parser = subparsers.add_parser(
        "publish", help="Upload the model and update the registry.")
    publish_parser.set_defaults(handler=publish_model)
    publish_parser.add_argument("model",
                                help="The path to the model to publish.")
    publish_parser.add_argument(
        "--meta",
        default=os.path.join(os.path.dirname(__file__),
                             "templates/template_meta.json"),
        help=
        "Path to the JSON file which contains the additional metadata of the model."
    )
    publish_parser.add_argument("-d",
                                "--update-default",
                                action="store_true",
                                help="Set this model as the default one.")
    publish_parser.add_argument("-f",
                                "--force",
                                action="store_true",
                                help="Overwrite existing models.")
    add_index_args(publish_parser)
    add_backend_args(publish_parser)
    add_templates_args(publish_parser)
    # ------------------------------------------------------------------------
    list_parser = subparsers.add_parser(
        "list", help="Lists all the models in the registry.")
    list_parser.set_defaults(handler=list_models)
    add_index_args(list_parser)
    # ------------------------------------------------------------------------
    delete_parser = subparsers.add_parser("delete", help="Delete a model.")
    delete_parser.set_defaults(handler=delete_model)
    delete_parser.add_argument("input",
                               help="UUID of the model to be deleted.")
    add_index_args(delete_parser)
    add_backend_args(delete_parser)
    add_templates_args(delete_parser)
    # ------------------------------------------------------------------------
    args = parser.parse_args()
    args.log_level = logging._nameToLevel[args.log_level]
    setup_logging(args.log_level)
    try:
        handler = args.handler
    except AttributeError:

        def print_usage(_):
            parser.print_usage()

        handler = print_usage
    return handler(args)
Пример #13
0
def main():
    """
    Creates all the argparse-rs and invokes the function from set_defaults().

    :return: The result of the function from set_defaults().
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--log-level",
                        default="INFO",
                        choices=logging._nameToLevel,
                        help="Logging verbosity.")
    subparsers = parser.add_subparsers(help="Commands", dest="command")

    def add_backend_args(p):
        p.add_argument("--backend", default=None, help="Backend to use.")
        p.add_argument("--args", default=None, help="Backend's arguments.")

    dump_parser = subparsers.add_parser(
        "dump", help="Print a brief information about the model to stdout.")
    dump_parser.set_defaults(handler=dump_model)
    dump_parser.add_argument("input",
                             help="Path to the model file, URL or UUID.")
    add_backend_args(dump_parser)

    publish_parser = subparsers.add_parser(
        "publish",
        help="Upload the model to the cloud and update the "
        "registry.")
    publish_parser.set_defaults(handler=publish_model)
    publish_parser.add_argument("model",
                                help="The path to the model to publish.")
    add_backend_args(publish_parser)
    publish_parser.add_argument("-d",
                                "--update-default",
                                action="store_true",
                                help="Set this model as the default one.")
    publish_parser.add_argument("--force",
                                action="store_true",
                                help="Overwrite existing models.")

    list_parser = subparsers.add_parser(
        "list", help="Lists all the models in the registry.")
    list_parser.set_defaults(handler=list_models)
    add_backend_args(list_parser)

    init_parser = subparsers.add_parser("init",
                                        help="Initialize the registry.")
    init_parser.set_defaults(handler=initialize_registry)
    add_backend_args(init_parser)

    args = parser.parse_args()
    args.log_level = logging._nameToLevel[args.log_level]
    setup_logging(args.log_level)
    try:
        handler = args.handler
    except AttributeError:

        def print_usage(_):
            parser.print_usage()

        handler = print_usage
    return handler(args)