示例#1
0
def bash_snippet(model_name, source="kipoi"):
    output_dir = 'example'
    try:
        kw = json.dumps(
            get_example_kwargs(model_name, source, output_dir=output_dir))
        group_name = get_group_name(model_name, source)
        env_name = conda_env_name(group_name, group_name, source)
    except Exception:
        kw = "Error"
        env_name = "Error"
        group_name = "Error"
    ctx = {
        "model_name": model_name,
        "model_name_no_slash": model_name.replace("/", "|"),
        "group_name": group_name,
        "env_name": env_name,
        "source": source,
        "output_dir": output_dir,
        "example_kwargs": kw
    }
    test_snippet = "Test the model", "kipoi test {model_name} --source={source}".format(
        **ctx)
    predict_snippet = "Make a prediction", """kipoi get-example {model_name} -o {output_dir}
kipoi predict {model_name} \\
  --dataloader_args='{example_kwargs}' \\
  -o '/tmp/{model_name_no_slash}.example_pred.tsv'
# check the results
head '/tmp/{model_name_no_slash}.example_pred.tsv'
""".format(**ctx)
    if model_name == "Basenji":
        test_snippet = "Test the model", "kipoi test {model_name} --batch_size=2 --source={source}".format(
            **ctx)
        predict_snippet = "Make a prediction", """kipoi get-example {model_name} -o {output_dir}
kipoi predict {model_name} \\
  --dataloader_args='{example_kwargs}' \\
  --batch_size=2 -o '/tmp/{model_name_no_slash}.example_pred.tsv'
# check the results
head '/tmp/{model_name_no_slash}.example_pred.tsv'
""".format(**ctx)
    return [
        ("Create a new conda environment with all dependencies installed",
         "kipoi env create {group_name}\nsource activate {env_name}".format(
             **ctx)),
        (test_snippet),
        (predict_snippet),
    ]
示例#2
0
def py_snippet(model_name, source="kipoi"):
    """Generate the python code snippet
    """
    try:
        kw = get_example_kwargs(model_name, source)
        group_name = get_group_name(model_name, source)
        env_name = conda_env_name(group_name, group_name, source)
    except Exception:
        kw = "Error"
        group_name = "Error"
        env_name = "Error"
    ctx = {
        "model_name": model_name,
        "group_name": group_name,
        "env_name": env_name,
        "example_kwargs": kw,
        "batch_size": get_batch_size(model_name, source)
    }
    return [
        ("Create a new conda environment with all dependencies installed",
         "kipoi env create {group_name}\nsource activate {env_name}".format(
             **ctx)),
        ("Get the model", """import kipoi
model = kipoi.get_model('{model_name}')""".format(**ctx)),
        ("Make a prediction for example files",
         """pred = model.pipeline.predict_example(batch_size={batch_size})""".
         format(**ctx)),
        ("Use dataloader and model separately",
         """# Download example dataloader kwargs
dl_kwargs = model.default_dataloader.download_example('example')
# Get the dataloader and instantiate it
dl = model.default_dataloader(**dl_kwargs)
# get a batch iterator
batch_iterator = dl.batch_iter(batch_size={batch_size})
for batch in batch_iterator:
    # predict for a batch
    batch_pred = model.predict_on_batch(batch['inputs'])""".format(**ctx)),
        ("Make predictions for custom files directly",
         """pred = model.pipeline.predict(dl_kwargs, batch_size={batch_size})"""
         .format(**ctx)),
    ]
示例#3
0
def test_model(model_name, caplog):
    """kipoi test ...
    """
    caplog.set_level(logging.INFO)

    source_name = "kipoi"
    assert source_name == "kipoi"

    env_name = conda_env_name(model_name, model_name, source_name)
    env_name = "test-" + env_name  # prepend "test-"

    # if environment already exists, remove it
    if env_exists(env_name):
        print("Removing the environment: {0}".format(env_name))
        remove_env(env_name)

    # create the model test environment
    args = ["kipoi", "env", "create",
            "--source", source_name,
            "--env", env_name,
            model_name]
    returncode = subprocess.call(args=args)
    assert returncode == 0

    if model_name == "basenji":
        batch_size = str(2)
    else:
        batch_size = str(4)

    # run the tests in the environment
    args = [get_kipoi_bin(env_name), "test",
            "--batch_size", batch_size,
            "--source", source_name,
            model_name]
    returncode = subprocess.call(args=args)
    assert returncode == 0

    for record in caplog.records:
        # there shoudn't be any warning
        assert record.levelname not in ['WARN', 'WARNING', 'ERROR', 'CRITICAL']
示例#4
0
def bash_snippet(model_name, source="kipoi"):
    try:
        kw = get_example_kwargs(model_name)
        env_name = conda_env_name(model_name, model_name, source)
    except Exception:
        kw = "Error"
        env_name = "Error"
    ctx = {"model_name": model_name,
           "model_name_no_slash": model_name.replace("/", "|"),
           "env_name": env_name,
           "source": source,
           "example_kwargs": kw}
    return [
        ("Create a new conda environment with all dependencies installed", "kipoi env create {model_name}\nsource activate {env_name}".format(**ctx)),
        ("Install model dependencies into current environment", "kipoi env install {model_name}".format(**ctx)),
        ("Test the model", "kipoi test {model_name} --source={source}".format(**ctx)),
        ("Make a prediction", """cd ~/.kipoi/models/{model_name}
kipoi predict {model_name} \\
  --dataloader_args='{example_kwargs}' \\
  -o '/tmp/{model_name_no_slash}.example_pred.tsv'
# check the results
head '/tmp/{model_name_no_slash}.example_pred.tsv'
""".format(**ctx)),
    ]
示例#5
0
def cli_test_source(command, raw_args):
    """Runs test on the model
    """
    assert command == "test-source"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Test models in model source')
    parser.add_argument('source', default="kipoi", help='Which source to test')
    parser.add_argument('--git-range',
                        nargs='+',
                        help='''Git range (e.g. commits or something like
                        "master HEAD" to check commits in HEAD vs master, or just "HEAD" to
                        include uncommitted changes). All models modified within this range will
                        be tested.''')
    parser.add_argument('-n',
                        '--dry_run',
                        action='store_true',
                        help='Dont run model testing')
    parser.add_argument('-b',
                        '--batch_size',
                        default=4,
                        type=int,
                        help='Batch size')
    parser.add_argument('-x',
                        '--exitfirst',
                        action='store_true',
                        help='exit instantly on first error or failed test.')
    parser.add_argument(
        '-k',
        default=None,
        help='only run tests which match the given substring expression')
    parser.add_argument('-c',
                        '--clean_env',
                        action='store_true',
                        help='clean the environment after running.')
    parser.add_argument(
        "--vep",
        action=kipoi.cli.main.DeprecateAction,
        help=
        "This argument is deprecated. Please use https://github.com/kipoi/kipoi-veff2 directly"
    )
    parser.add_argument('--common_env',
                        action='store_true',
                        help='Test models in common environments.')
    parser.add_argument('--all',
                        action='store_true',
                        help="Test all models in the source")
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help=
        "Increase output verbosity. Show conda stdout during env installation."
    )
    parser.add_argument('--shard_id', type=int, default=-1, help="Shard id")
    parser.add_argument('--num_of_shards',
                        type=int,
                        default=-1,
                        help="Number of shards")
    parser.add_argument('--singularity',
                        action='store_true',
                        help='Test models within their singularity containers')

    args = parser.parse_args(raw_args)
    if args.singularity and args.source != "kipoi":
        raise IOError(
            "Singularity containers are available for kipoi models only")
    if args.singularity and args.common_env:
        raise IOError("Please use only one of --singularity and --common_env")

    # --------------------------------------------
    source = kipoi.get_source(args.source)
    all_models = all_models_to_test(source)
    if args.k is not None:
        all_models = [x for x in all_models if re.match(args.k, x)]

    if len(all_models) == 0:
        logger.info("No models found in the source")
        sys.exit(1)

    if args.all:
        test_models = all_models
        logger.info('Testing all models:\n- {0}'.format(
            '\n- '.join(test_models)))
    else:
        test_models = restrict_models_to_test(all_models, source,
                                              args.git_range)
        if len(test_models) == 0:
            logger.info("No model modified according to git, exiting.")
            sys.exit(0)
        logger.info('{0}/{1} models modified according to git:\n- {2}'.format(
            len(test_models), len(all_models), '\n- '.join(test_models)))
    # Sort the models alphabetically
    test_models = sorted(test_models)
    if args.num_of_shards > 0 and args.shard_id >= 0:
        if args.shard_id >= args.num_of_shards:
            logger.info(
                "Shard id is invalid. It should be a value between 0 and {0}.".
                format(args.num_of_shards - 1))
            sys.exit(1)
        else:
            all_test_models = test_models
            sublists = np.array_split(all_test_models, args.num_of_shards)
            list_of_shards = [list(split) for split in sublists]
            test_models = list_of_shards[args.shard_id]

    logger.info(test_models)
    # Parse the repo config
    cfg_path = get_file_path(source.local_path,
                             "config",
                             extensions=[".yml", ".yaml"],
                             raise_err=False)
    if cfg_path is not None:
        cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False)
        logger.info("Found config {0}:\n{1}".format(cfg_path, cfg))
    else:
        cfg = None

    if args.dry_run:
        logger.info(
            "-n/--dry_run enabled. Skipping model testing and exiting.")
        sys.exit(0)

    # TODO - make sure the modes are always tested in the same order?
    #        - make sure the keras config doesn't get cluttered

    # Test common environments
    if args.common_env:
        logger.info("Installing common environmnets")
        import yaml
        models_yaml_path = os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                                        "models.yaml")
        if not os.path.exists(models_yaml_path):
            logger.error(
                "{} doesn't exists when installing the common environment".
                format(models_yaml_path))
            sys.exit(1)
        model_envs = yaml.safe_load(
            open(os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                              "models.yaml"),
                 "r",
                 encoding="utf-8"))

        test_envs = {
            get_common_env(m, model_envs)
            for m in test_models if get_common_env(m, model_envs) is not None
        }

        if len(test_envs) == 0:
            logger.info("No common environments to test")
            sys.exit(0)

        logger.info(
            "Instaling environments covering the following models: \n{}".
            format(yaml.dump(model_envs)))
        for env in test_envs:
            if env_exists(env):
                logger.info(
                    "Common environment already exists: {}. Skipping the installation"
                    .format(env))
            else:
                logger.info("Installing environment: {}".format(env))
                create_model_env(os.path.join(SPECIAL_ENV_PREFIX, env),
                                 args.source, env)

    logger.info("Running {0} tests..".format(len(test_models)))
    failed_models = []
    for i in range(len(test_models)):
        m = test_models[i]
        print('-' * 20)
        print("{0}/{1} - model: {2}".format(i + 1, len(test_models), m))
        print('-' * 20)
        try:
            if not args.common_env and not args.singularity:
                # Prepend "test-" to the standard kipoi env name
                env_name = conda_env_name(m, source=args.source)
                env_name = "test-" + env_name
                # Test
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           create_env=True,
                           verbose=args.verbose)
            elif args.singularity and not args.common_env:
                print("Testing within singularity container....")
                test_model_singularity(m,
                                       args.source,
                                       get_batch_size(cfg, m, args.batch_size),
                                       verbose=args.verbose)
            elif args.common_env and not args.singularity:
                # figure out the common environment name
                env_name = get_common_env(m, model_envs)
                if env_name is None:
                    # skip is none was found
                    logger.info(
                        "Common environment not found for {}".format(m))
                    continue
                # ---------------------------
                # Test
                print("test_model...")
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           create_env=False,
                           verbose=args.verbose)
            else:
                raise IOError(
                    "Please either choose --common_env or --singularity or none"
                )
        except Exception as e:
            logger.error("Model {0} failed: {1}".format(m, e))
            failed_models += [m]
            if args.exitfirst:
                if args.clean_env and not args.common_env:
                    rm_env(env_name)
                sys.exit(1)
        finally:
            if args.clean_env and not args.common_env:
                rm_env(env_name)
    print('-' * 40)
    if failed_models:
        logger.error("{0}/{1} tests failed for models:\n- {2}".format(
            len(failed_models), len(test_models), "\n- ".join(failed_models)))
        sys.exit(1)

    logger.info('All tests ({0}) passed'.format(len(test_models)))
示例#6
0
def cli_test_source(command, raw_args):
    """Runs test on the model
    """
    assert command == "test-source"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Test models in model source')
    parser.add_argument('source', default="kipoi",
                        help='Which source to test')
    parser.add_argument('--git-range', nargs='+',
                        help='''Git range (e.g. commits or something like
                        "master HEAD" to check commits in HEAD vs master, or just "HEAD" to
                        include uncommitted changes). All models modified within this range will
                        be tested.''')
    parser.add_argument('-n', '--dry_run', action='store_true',
                        help='Dont run model testing')
    parser.add_argument('-b', '--batch_size', default=4, type=int,
                        help='Batch size')
    parser.add_argument('-x', '--exitfirst', action='store_true',
                        help='exit instantly on first error or failed test.')
    parser.add_argument('-k', default=None,
                        help='only run tests which match the given substring expression')
    parser.add_argument('-c', '--clean_env', action='store_true',
                        help='clean the environment after running.')
    parser.add_argument('--vep', action='store_true',
                        help='Install the vep dependency.')
    parser.add_argument('--all', action='store_true',
                        help="Test all models in the source")

    args = parser.parse_args(raw_args)
    # --------------------------------------------
    source = kipoi.get_source(args.source)
    all_models = all_models_to_test(source)
    if args.k is not None:
        all_models = [x for x in all_models if re.match(args.k, x)]

    if len(all_models) == 0:
        logger.info("No models found in the source")
        sys.exit(1)
    if args.all:
        test_models = all_models
        logger.info('Testing all models:\n- {0}'.
                    format('\n- '.join(test_models)))
    else:
        test_models = restrict_models_to_test(all_models,
                                              source,
                                              args.git_range)
        if len(test_models) == 0:
            logger.info("No model modified according to git, exiting.")
            sys.exit(0)
        logger.info('{0}/{1} models modified according to git:\n- {2}'.
                    format(len(test_models), len(all_models),
                           '\n- '.join(test_models)))
    # Sort the models alphabetically
    test_models = sorted(test_models)

    # Parse the repo config
    cfg_path = get_file_path(source.local_path, "config",
                             extensions=[".yml", ".yaml"],
                             raise_err=False)
    if cfg_path is not None:
        cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False)
        logger.info("Found config {0}:\n{1}".format(cfg_path, cfg))
    else:
        cfg = None

    if args.dry_run:
        logger.info("-n/--dry_run enabled. Skipping model testing and exiting.")
        sys.exit(0)

    # TODO - make sure the modes are always tested in the same order?
    #        - make sure the keras config doesn't get cluttered

    logger.info("Running {0} tests..".format(len(test_models)))
    failed_models = []
    for i in range(len(test_models)):
        m = test_models[i]
        print('-' * 20)
        print("{0}/{1} - model: {2}".format(i + 1,
                                            len(test_models),
                                            m))
        print('-' * 20)
        try:
            env_name = conda_env_name(m, source=args.source)
            env_name = "test-" + env_name  # prepend "test-"
            test_model(m, args.source, env_name,
                       get_batch_size(cfg, m, args.batch_size), args.vep)
        except Exception as e:
            logger.error("Model {0} failed: {1}".format(m, e))
            failed_models += [m]
            if args.exitfirst:
                if args.clean_env:
                    rm_env(env_name)
                sys.exit(1)
        finally:
            if args.clean_env:
                rm_env(env_name)
    print('-' * 40)
    if failed_models:
        logger.error("{0}/{1} tests failed for models:\n- {2}".
                     format(len(failed_models),
                            len(test_models),
                            "\n- ".join(failed_models)))
        sys.exit(1)

    logger.info('All tests ({0}) passed'.format(len(test_models)))
示例#7
0
def cli_test_source(command, raw_args):
    """Runs test on the model
    """
    assert command == "test-source"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Test models in model source')
    parser.add_argument('source', default="kipoi", help='Which source to test')
    parser.add_argument('--git-range',
                        nargs='+',
                        help='''Git range (e.g. commits or something like
                        "master HEAD" to check commits in HEAD vs master, or just "HEAD" to
                        include uncommitted changes). All models modified within this range will
                        be tested.''')
    parser.add_argument('-n',
                        '--dry_run',
                        action='store_true',
                        help='Dont run model testing')
    parser.add_argument('-b',
                        '--batch_size',
                        default=4,
                        type=int,
                        help='Batch size')
    parser.add_argument('-x',
                        '--exitfirst',
                        action='store_true',
                        help='exit instantly on first error or failed test.')
    parser.add_argument(
        '-k',
        default=None,
        help='only run tests which match the given substring expression')
    parser.add_argument('-c',
                        '--clean_env',
                        action='store_true',
                        help='clean the environment after running.')
    parser.add_argument('--vep',
                        action='store_true',
                        help='Install the vep dependency.')
    parser.add_argument('--common_env',
                        action='store_true',
                        help='Test models in common environments.')
    parser.add_argument('--all',
                        action='store_true',
                        help="Test all models in the source")

    args = parser.parse_args(raw_args)
    # --------------------------------------------
    source = kipoi.get_source(args.source)
    all_models = all_models_to_test(source)
    if args.k is not None:
        all_models = [x for x in all_models if re.match(args.k, x)]

    if len(all_models) == 0:
        logger.info("No models found in the source")
        sys.exit(1)
    if args.all:
        test_models = all_models
        logger.info('Testing all models:\n- {0}'.format(
            '\n- '.join(test_models)))
    else:
        test_models = restrict_models_to_test(all_models, source,
                                              args.git_range)
        if len(test_models) == 0:
            logger.info("No model modified according to git, exiting.")
            sys.exit(0)
        logger.info('{0}/{1} models modified according to git:\n- {2}'.format(
            len(test_models), len(all_models), '\n- '.join(test_models)))
    # Sort the models alphabetically
    test_models = sorted(test_models)

    # Parse the repo config
    cfg_path = get_file_path(source.local_path,
                             "config",
                             extensions=[".yml", ".yaml"],
                             raise_err=False)
    if cfg_path is not None:
        cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False)
        logger.info("Found config {0}:\n{1}".format(cfg_path, cfg))
    else:
        cfg = None

    if args.dry_run:
        logger.info(
            "-n/--dry_run enabled. Skipping model testing and exiting.")
        sys.exit(0)

    # TODO - make sure the modes are always tested in the same order?
    #        - make sure the keras config doesn't get cluttered

    # Test common environments
    if args.common_env:
        logger.info("Installing common environmnets")
        import yaml
        models_yaml_path = os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                                        "models.yaml")
        if not os.path.exists(models_yaml_path):
            logger.error(
                "{} doesn't exists when installing the common environment".
                format(models_yaml_path))
            sys.exit(1)
        model_envs = yaml.load(
            open(os.path.join(source.local_path, SPECIAL_ENV_PREFIX,
                              "models.yaml"),
                 "r",
                 encoding="utf-8"))

        test_envs = {
            get_common_env(m, model_envs)
            for m in test_models if get_common_env(m, model_envs) is not None
        }

        if len(test_envs) == 0:
            logger.info("No common environments to test")
            sys.exit(0)

        logger.info(
            "Instaling environments covering the following models: \n{}".
            format(yaml.dump(model_envs)))
        for env in test_envs:
            if env_exists(env):
                logger.info(
                    "Common environment already exists: {}. Skipping the installation"
                    .format(env))
            else:
                logger.info("Installing environment: {}".format(env))
                create_model_env(os.path.join(SPECIAL_ENV_PREFIX, env),
                                 args.source,
                                 env,
                                 vep=args.vep)

    logger.info("Running {0} tests..".format(len(test_models)))
    failed_models = []
    for i in range(len(test_models)):
        m = test_models[i]
        print('-' * 20)
        print("{0}/{1} - model: {2}".format(i + 1, len(test_models), m))
        print('-' * 20)
        try:
            if not args.common_env:
                # Prepend "test-" to the standard kipoi env name
                env_name = conda_env_name(m, source=args.source)
                env_name = "test-" + env_name
                # Test
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           args.vep,
                           create_env=True)
            else:
                # figure out the common environment name
                env_name = get_common_env(m, model_envs)
                if env_name is None:
                    # skip is none was found
                    logger.info(
                        "Common environmnet not found for {}".format(m))
                    continue
                # ---------------------------
                # Test
                print("test_model...")
                test_model(m,
                           args.source,
                           env_name,
                           get_batch_size(cfg, m, args.batch_size),
                           args.vep,
                           create_env=False)
        except Exception as e:
            logger.error("Model {0} failed: {1}".format(m, e))
            failed_models += [m]
            if args.exitfirst:
                if args.clean_env and not args.common_env:
                    rm_env(env_name)
                sys.exit(1)
        finally:
            if args.clean_env and not args.common_env:
                rm_env(env_name)
    print('-' * 40)
    if failed_models:
        logger.error("{0}/{1} tests failed for models:\n- {2}".format(
            len(failed_models), len(test_models), "\n- ".join(failed_models)))
        sys.exit(1)

    logger.info('All tests ({0}) passed'.format(len(test_models)))