示例#1
0
def cmd_build(args, remainder):
    common_args = []
    common_args += ['--config={}'.format(args.variant)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--explain={}'.format(os.path.abspath('explain.log'))]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']
    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env = os.environ.copy()
        env.update(cenv.env())
    else:
        env = None

    util.check_call(['bazelisk', 'test', '...'] + common_args, env=env)
    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
示例#2
0
文件: upload.py 项目: garza/v8monkey
def DoSCPFile(file, remote_path, user, host, port=None, ssh_key=None):
    """Upload file to user@host:remote_path using scp. Optionally use
    port and ssh_key, if provided."""
    cmdline = ["scp"]
    AppendOptionalArgsToSSHCommandline(cmdline, port, ssh_key)
    cmdline.extend([WindowsPathToMsysPath(file),
                    "%s@%s:%s" % (user, host, remote_path)])
    check_call(cmdline)
示例#3
0
def run_iss(rc, iss_command):
    check_call(iss_command)
    counters = defaultdict(int)
    for tmp_fastq, output_fastq, r in zip(rc.tmp_files, rc.output_files,
                                          ["_1\n", "_2\n"]):
        annotate_and_count_reads(tmp_fastq, output_fastq, r, counters)
    output_summary_counters(rc.summary_file, counters)
    rc.cleanup()
示例#4
0
def DoSCPFile(file, remote_path, user, host, port=None, ssh_key=None):
    """Upload file to user@host:remote_path using scp. Optionally use
    port and ssh_key, if provided."""
    cmdline = ["scp"]
    AppendOptionalArgsToSSHCommandline(cmdline, port, ssh_key)
    cmdline.extend([WindowsPathToMsysPath(file),
                    "%s@%s:%s" % (user, host, remote_path)])
    check_call(cmdline)
示例#5
0
文件: console.py 项目: nveeser/dino
def activate(generated_root=None, logger=None):
    if logger is None:
        logger = util.setup_logging('console')
    conf_file = os.path.join(generated_root, 'conserver.cf')
    shutil.copy(conf_file, '/etc/conserver.cf')
    try:
        util.check_call(['/etc/init.d/conserver', 'restart'])
    except (RuntimeError, SystemExit):
        raise
示例#6
0
def main() -> None:
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s:%(levelname)s: %(message)s")
    p = argparse.ArgumentParser()
    p.add_argument('--cache_dir',
                   default=util.DEFAULT_CACHE_DIR,
                   help='Benchmark cache dir')
    p.add_argument('--result_dir',
                   default=util.DEFAULT_RESULT_DIR,
                   help='Benchmark result dir')
    args = p.parse_args()

    util.s3_cache_files([
        util.REFERENCE_DIR + '/gencode.v26.whole_genes.fa',
        util.REFERENCE_DIR + '/all_pair_art_lod_gpair_merged.txt'
    ], args.cache_dir)
    for sample in util.TITRATION_SAMPLES:
        logging.info('Start benchmark %s', sample.name)
        result_dir = args.result_dir + '/' + sample.name
        try:
            os.makedirs(result_dir, 0o755)
        except:
            logging.error("mkdir %s failed", result_dir)
        if os.path.exists(result_dir + "/filtered.fa"):
            logging.info("Skip %s", result_dir)
            continue
        util.s3_cache_files(util.expand_fastq_files(sample.paths),
                            args.cache_dir)
        cached_r1 = ",".join([
            args.cache_dir + '/' + os.path.basename(fq.r1)
            for fq in sample.paths
        ])
        cached_r2 = ",".join([
            args.cache_dir + '/' + os.path.basename(fq.r2)
            for fq in sample.paths
        ])
        cached_ref = args.cache_dir + '/gencode.v26.whole_genes.fa'
        cached_cosmic_fusion = args.cache_dir + '/all_pair_art_lod_gpair_merged.txt'

        af4_args = [
            str(util.af4_path()), f'-log_dir={result_dir}', f'-pprof=:12345',
            f'-mutex-profile-rate=1000', f'-block-profile-rate=1000',
            f'-r1={cached_r1}', f'-r2={cached_r2}',
            f'-fasta-output={result_dir}/all.fa',
            f'-filtered-output={result_dir}/filtered.fa',
            f'-transcript={cached_ref}', f'-max-genes-per-kmer=2',
            f'-max-proximity-distance=1000', f'-max-proximity-genes=5',
            f'-unstranded-prep', f'-cosmic-fusion={cached_cosmic_fusion}'
        ]
        util.check_call(af4_args)
        logging.info('Finished benchmark %d: %s', sample.name)
        logging.info("Runtime stats: %s", util.run_stats(Path(result_dir)))
        for path in glob.glob(f'{args.cache_dir}/*rerun*'):
            try:
                os.remove(path)
            except:
                logging.error("failed to remove " + path)
示例#7
0
def cmd_build(args, remainder):
    import yaml
    with open('ci/plan.yml') as file_:
        plan = yaml.safe_load(file_)

    env = os.environ.copy()
    variant = plan['VARIANTS'][args.variant]
    for key, value in variant['env'].items():
        env[key] = str(value)

    util.printf('--- :snake: pre-build steps... ')
    util.printf('delete any old whl files...')
    wheel_dirs = [
        wheel_path('plaidml').resolve(),
        wheel_path('plaidml/keras').resolve(),
        wheel_path('plaidbench').resolve(),
    ]
    wheel_clean(wheel_dirs)

    explain_log = 'explain.log'
    profile_json = 'profile.json.gz'
    bazel_config = variant.get('bazel_config', args.variant)

    common_args = []
    common_args += ['--config={}'.format(bazel_config)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--experimental_generate_json_trace_profile']
    common_args += ['--experimental_json_trace_compression']
    common_args += ['--experimental_profile_cpu_usage']
    common_args += ['--explain={}'.format(explain_log)]
    common_args += ['--profile={}'.format(profile_json)]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']

    util.printf('--- :bazel: Running Build...')
    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env.update(cenv.env())
    util.check_call(['bazelisk', 'test', '...'] + common_args, env=env)

    util.printf('--- :buildkite: Uploading artifacts...')
    buildkite_upload(explain_log)
    buildkite_upload(profile_json)
    for wheel_dir in wheel_dirs:
        buildkite_upload('*.whl', cwd=wheel_dir)

    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
示例#8
0
def cmd_report(args, remainder):
    archive_dir = os.path.join(args.root, args.pipeline, args.build_id)
    cmd = ['bazelisk', 'run', '//ci:report']
    cmd += ['--']
    cmd += ['--pipeline', args.pipeline]
    cmd += ['--annotate']
    cmd += [archive_dir]
    cmd += remainder
    util.check_call(cmd)
示例#9
0
def cmd_build(args, remainder):
    import yaml
    with open('ci/plan.yml') as file_:
        plan = yaml.safe_load(file_)

    env = os.environ.copy()
    variant = plan['VARIANTS'][args.variant]
    for key, value in variant['env'].items():
        env[key] = str(value)

    explain_log = 'explain.log'
    profile_json = 'profile.json.gz'
    bazel_config = variant.get('bazel_config', args.variant)

    common_args = []
    common_args += ['--config={}'.format(bazel_config)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--experimental_generate_json_trace_profile']
    common_args += ['--experimental_json_trace_compression']
    common_args += ['--experimental_profile_cpu_usage']
    common_args += ['--explain={}'.format(explain_log)]
    common_args += ['--profile={}'.format(profile_json)]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']

    util.printf('--- :bazel: Running Build...')
    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env.update(cenv.env())
    util.check_call(['bazelisk', 'test', '...'] + common_args, env=env)

    util.printf('--- :buildkite: Uploading artifacts...')
    buildkite_upload(explain_log)
    buildkite_upload(profile_json)

    shutil.rmtree('tmp', ignore_errors=True)
    tarball = os.path.join('bazel-bin', 'pkg.tar.gz')
    with tarfile.open(tarball, "r") as tar:
        wheels = []
        for item in tar.getmembers():
            if item.name.endswith('.whl'):
                wheels.append(item)
        tar.extractall('tmp', members=wheels)
    buildkite_upload('*.whl', cwd='tmp')

    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(tarball, archive_dir)
示例#10
0
def copy_files(src_files: List[str], dest_dir: str) -> None:
    basenames: Set[str] = set()
    for path in src_files:
        basename = os.path.basename(path)
        if basename in basenames:
            raise Exception("Duplicate filename: " + path)
        basenames.add(basename)
    logging.info("%s -> %s", src_files, dest_dir)
    util.check_call([str(util.grail_file_path()), "cp", "-v"] + src_files +
                    [dest_dir])
示例#11
0
def cmd_report(args, remainder):
    workdir = pathlib.Path('tmp').resolve()
    make_all_wheels(workdir)
    archive_dir = os.path.join(args.root, args.pipeline, args.build_id)
    cmd = ['bazelisk', 'run', '//ci:report']
    cmd += ['--']
    cmd += ['--pipeline', args.pipeline]
    cmd += ['--annotate']
    cmd += [archive_dir]
    cmd += remainder
    util.check_call(cmd, stderr=subprocess.DEVNULL)
示例#12
0
def cmd_report(args, remainder):
    workdir = pathlib.Path('tmp').resolve()
    make_all_wheels(workdir)
    download_test_artifacts('tmp/test/**/*')
    cmd = ['bazelisk', 'run', '//ci:report']
    cmd += ['--']
    cmd += ['--pipeline', args.pipeline]
    cmd += ['--annotate']
    cmd += [str(workdir)]
    cmd += remainder
    util.check_call(cmd, stderr=subprocess.DEVNULL)
示例#13
0
 def fetch_versioned_accession_id(vaccid):  # e.g., "NC_004325.2"
     output_file = f"{vaccid}.fa"
     if os.path.isfile(output_file):
         print(f"{output_file} already exists, nice")
     else:
         try:
             command = f"ncbi-acc-download --format fasta {vaccid} -e all"
             check_call(command)
         except:
             remove_safely(output_file)
             raise
     return output_file
示例#14
0
    def run(self):
        failed = False
        if self.distribution.subpackages != None:
            for idx in range(len(sys.argv)):
                if 'setup.py' in sys.argv[idx]:
                    break
            argv = list(sys.argv[idx+1:])
            build = self.get_finalized_command('build')
            failed = process_subpackages(build.distribution.parallel_build,
                                         'test', build.build_base,
                                         self.distribution.subpackages,
                                         argv, False)

        ## PYTHON
        if self._has_python_tests():
            self.run_command('build')
            build = self.get_finalized_command('build')
            build_dir = build.build_base
            environ = self.distribution.environment

            pkg_dirs = [build_dir, build.build_lib,
                        os.path.join(build_dir, 'python')]
            lib_dirs = [build.build_temp]
            try:
                lib_dirs += environ['PATH']
                # FIXME need boost, etc dlls for windows
            except:
                pass
            try:
                lib_dirs.append(os.path.join(environ['MINGW_DIR'], 'bin'))
                lib_dirs.append(os.path.join(environ['MSYS_DIR'], 'bin'))
                lib_dirs.append(os.path.join(environ['MSYS_DIR'], 'lib'))
            except:
                pass
            postfix = '.'.join(build.build_temp.split('.')[1:])        

            for pkg, units in self._get_python_tests():
                test_dir = os.path.join(build_dir, 'test_' + pkg)
                if not os.path.exists(test_dir):
                    util.copy_tree('test', test_dir, excludes=['.svn*', 'CVS*'])
                f = open(os.path.join(test_dir, '__init__.py'), 'w')
                f.write("__all__ = ['" +
                        "', '".join(units) + "']\n")
                f.close()
                outfile = os.path.join(build_dir, 'test_' + pkg + '.py')
                util.create_testscript('test_' + pkg, units, outfile, pkg_dirs)
                wrap = util.create_test_wrapper(outfile, build_dir, lib_dirs)
                log.info('Python unit tests for ' + pkg)
                try:
                    util.check_call([wrap])
                except Exception, e:
                    failed = True
                    print e
示例#15
0
def cmd_report(args, remainder):
    cmd_pack('tmp')
    #shutil.rmtree('tmp')

    archive_dir = os.path.join(args.root, args.pipeline, args.build_id)
    cmd = ['bazelisk', 'run', '//ci:report']
    cmd += ['--']
    cmd += ['--pipeline', args.pipeline]
    cmd += ['--annotate']
    cmd += [archive_dir]
    cmd += remainder
    util.check_call(cmd, stderr=subprocess.DEVNULL)
示例#16
0
def cmd_report(args, remainder):
    workdir = pathlib.Path('tmp').resolve()
    make_all_wheels(workdir)
    download_test_artifacts('tmp/test/**/*')
    startup_args = ['--output_base={}'.format(output_base())]
    cmd = ['bazelisk'] + startup_args + ['run', '//ci:report']
    cmd += ['--']
    cmd += ['--pipeline', args.pipeline]
    cmd += ['--annotate']
    cmd += [str(workdir)]
    cmd += remainder
    util.check_call(cmd)
示例#17
0
def cmd_build(args, remainder):

    util.printf('--- :snake: pre-build steps... ')
    util.printf('bazel shutdown...')
    util.check_output(['bazelisk', 'shutdown'])
    util.printf('delete any old whl files...')
    wheel_clean('plaidml')
    wheel_clean('plaidbench')
    wheel_clean('plaidml/keras')

    common_args = []
    common_args += ['--config={}'.format(args.variant)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--explain={}'.format(os.path.abspath('explain.log'))]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']

    util.printf('--- :bazel: Running Build ...')

    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env = os.environ.copy()
        env.update(cenv.env())
    else:
        env = None

    util.check_call(['bazelisk', 'test', '...'] + common_args,
                    env=env,
                    stderr=subprocess.DEVNULL)
    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )

    util.printf('--- :buildkite: Uploading artifacts...')
    pw = wheel_path('plaidml').resolve()
    pbw = wheel_path('plaidbench').resolve()
    pkw = wheel_path('plaidml/keras').resolve()
    util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'], cwd=pw)
    util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'],
                    cwd=pbw)
    util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'],
                    cwd=pkw)
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
    util.printf('bazel shutdown...')
    util.check_output(['bazelisk', 'shutdown'])
示例#18
0
def run_af4(
    sample_name: str,
    cached_file_pairs: List[util.FASTQPair],
    cosmic_fusion_path: str,
    args: Any,
):
    ref_path = "s3://grail-publications/resources/gencode.v26.whole_genes.fa"
    util.s3_cache_files([ref_path, cosmic_fusion_path], args.cache_dir)

    cached_r1 = ",".join([
        args.cache_dir + "/" + os.path.basename(fp.r1)
        for fp in cached_file_pairs
    ])
    cached_r2 = ",".join([
        args.cache_dir + "/" + os.path.basename(fp.r2)
        for fp in cached_file_pairs
    ])
    for mode in ["denovo", "targeted"]:
        result_dir = args.result_dir + "/" + os.path.basename(sample_name +
                                                              "-" + mode)
        if os.path.exists(result_dir + "/filtered.fa"):
            logging.info("Skipping benchmark: %s", result_dir)
            continue
        logging.info("Start af4 benchmark: %s", result_dir)
        try:
            os.makedirs(result_dir, 0o755)
        except:
            logging.error("mkdir %s failed", result_dir)
        af4_args = [
            str(util.af4_path()),
            f"-log_dir={result_dir}",
            f"-pprof=:12345",
            f"-mutex-profile-rate=1000",
            f"-block-profile-rate=1000",
            f"-r1={cached_r1}",
            f"-r2={cached_r2}",
            f"-max-genes-per-kmer=2",
            f"-max-proximity-distance=1000",
            f"-max-proximity-genes=5",
            f"-fasta-output={result_dir}/all.fa",
            f"-filtered-output={result_dir}/filtered.fa",
            f"-transcript={args.cache_dir}/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa",
        ]
        if mode == "targeted":
            af4_args.append(
                f"-cosmic-fusion={args.cache_dir}/all_pair_art_lod_gpair_merged.txt"
            )
        util.check_call(af4_args)
        logging.info("Finished benchmark: %s", result_dir)
        logging.info("Runtime stats: %s", util.run_stats(Path(result_dir)))
示例#19
0
 def fetch_all():
     for g in Genome.all.values():
         remove_safely(g.filename)
         accession_fas = []
         for f in (g.versioned_accession_ids):
             af = Genome.fetch_versioned_accession_id(f)
             accession_fas.append(af)
         accession_fastas = " ".join(accession_fas)
         command = f"cat {accession_fastas} > {g.filename}"
         check_call(command)
         assert os.path.isfile(g.filename), f"Failed to download genome {g.filename}"
         command = f"grep -v '^>' {g.filename} | tr -d '\n' | wc > {g.key}.size"
         check_call(command)
         with open(f"{g.key}.size") as f:
             line = f.readline().rstrip()
             g.size = int(line.split()[2])
         print(f"Genome {g.key} size {g.size} bases.")
示例#20
0
def run_af4(sample_name: str, cached_file_pairs: List[util.FASTQPair],
            args: Any):
    ref_path = "s3://grail-publications/resources/gencode.v26.whole_genes.fa"
    cosmic_fusion_path = "s3://grail-publications/resources/all_pair_art_lod_gpair_merged.txt"
    util.s3_cache_files([ref_path, cosmic_fusion_path], args.cache_dir)

    cached_r1 = ",".join([
        args.cache_dir + '/' + os.path.basename(fp.r1)
        for fp in cached_file_pairs
    ])
    cached_r2 = ",".join([
        args.cache_dir + '/' + os.path.basename(fp.r2)
        for fp in cached_file_pairs
    ])
    for mode in ['denovo', 'targeted']:
        result_dir = args.result_dir + '/' + os.path.basename(sample_name +
                                                              '-' + mode)
        if os.path.exists(result_dir + "/filtered.fa"):
            logging.info('Skipping benchmark: %s', result_dir)
            continue
        logging.info('Start af4 benchmark: %s', result_dir)
        try:
            os.makedirs(result_dir, 0o755)
        except:
            logging.error("mkdir %s failed", result_dir)
        af4_args = [
            str(util.af4_path()), f'-log_dir={result_dir}', f'-pprof=:12345',
            f'-mutex-profile-rate=1000', f'-block-profile-rate=1000',
            f'-umi-in-read', f'-r1={cached_r1}', f'-r2={cached_r2}',
            f'-fasta-output={result_dir}/all.fa',
            f'-filtered-output={result_dir}/filtered.fa',
            f'-transcript={args.cache_dir}/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa'
        ]
        if mode == 'targeted':
            af4_args.append(
                f'-cosmic-fusion={args.cache_dir}/all_pair_art_lod_gpair_merged.txt'
            )
        util.check_call(af4_args)
        logging.info('Finished benchmark: %s', result_dir)
        logging.info("Runtime stats: %s", util.run_stats(Path(result_dir)))
示例#21
0
def cmd_pack(arg):
    pathlib.Path(arg).mkdir(parents=True, exist_ok=True)
    dir_list = ['windows_x86_64', 'macos_x86_64', 'linux_x86_64', 'common']
    whl_type = ['win_amd64', 'macosx', 'manylinux', 'none-any']

    os.chdir(arg)
    print('downloading wheels...')
    util.check_output(
        ['buildkite-agent', 'artifact', 'download', '*.whl', '.'], cwd='.')
    whl_files = list(pathlib.Path('.').glob('*.whl'))

    [pathlib.Path(d).mkdir(parents=True, exist_ok=True) for d in dir_list]

    for whl, dest in zip(whl_type, dir_list):
        for f in whl_files:
            if whl in str(f):
                shutil.move(str(f), dest)

    print('packing all_wheels...')
    make_tarfile('all_wheels.tar.gz', '.')
    util.check_call(
        ['buildkite-agent', 'artifact', 'upload', 'all_wheels.tar.gz'])
    os.chdir('..')
示例#22
0
def main() -> None:
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s:%(levelname)s: %(message)s")

    p = argparse.ArgumentParser()
    p.add_argument('--cache_dir',
                   default=util.DEFAULT_CACHE_DIR,
                   help='Benchmark cache dir')
    p.add_argument('--result_dir',
                   default=util.DEFAULT_RESULT_DIR,
                   help='Benchmark result dir')
    p.add_argument(
        '--rerun_af4',
        action='store_true',
        help='Always run AF4 even if the result file already exists')
    p.add_argument(
        '--recache_files',
        action='store_true',
        help=
        'Always copy benchmark data files, even if they already exist locally.'
    )
    args = p.parse_args()
    util.s3_cache_files([
        util.REFERENCE_DIR +
        '/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa',
        util.REFERENCE_DIR + '/all_pair_art_lod_gpair_merged.txt',
        util.REFERENCE_DIR + '/liu_gpair.txt'
    ], args.cache_dir)
    for mode in ['denovo', 'targeted']:
        for sample in util.SIMULATED_SAMPLES:
            util.s3_cache_files([sample.path.r1, sample.path.r2],
                                args.cache_dir)
            result_dir = f'{args.result_dir}/synthetic-{mode}-{sample.n}-{sample.coverage}'
            try:
                os.makedirs(result_dir, 0o755)
            except:
                logging.error("mkdir %s failed", result_dir)
            if not os.path.exists(
                    f'{result_dir}/filtered.fa') or args.rerun_af4:
                logging.info('running benchmark in %s', result_dir)
                af4_args = [
                    str(util.af4_path()), f'-log_dir={result_dir}',
                    f'-r1={args.cache_dir}/{sample.path.r1}',
                    f'-r2={args.cache_dir}/{sample.path.r2}',
                    f'-fasta-output={result_dir}/all.fa',
                    f'-filtered-output={result_dir}/filtered.fa',
                    '-transcript=' + args.cache_dir +
                    '/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa'
                ]
                if mode == 'targeted':
                    af4_args.append('-cosmic-fusion=' + args.cache_dir +
                                    '/all_pair_art_lod_gpair_merged.txt')
                util.check_call(af4_args)
                logging.info("Runtime stats: %s",
                             util.run_stats(Path(result_dir)))

            stats = TargetedFusionStats(
                Path(f'{args.cache_dir}/liu_gpair.txt'),
                Path(f'{result_dir}/filtered.fa'))

            s = stats.stats()
            tp = "%d" % (s.tp, )
            fp = "%d" % (s.fp, )
            fn = "%d" % (s.fn, )
            print(
                f'{mode} & {sample.n} & {sample.coverage} & {tp} & {fp} & {fn}\\\\'
            )
示例#23
0
def concatenate_fasta(genomes, genomes_file):
    genome_fastas = " ".join(g.filename for g in genomes)
    command = f"cat {genome_fastas} > {genomes_file}"
    check_call(command)
示例#24
0
def run_starfusion(sample_name: str, cached_file_pairs: List[util.FASTQPair],
                   args: Any):
    match = re.match(r'.*/([^/]+)\.FULL\.tar\.gz$', args.starfusion_targz)
    assert match
    local_starfusion_dir = match[1]

    logging.info("LOCAL: %s", local_starfusion_dir)
    if not os.path.exists(
            os.path.join(args.starfusion_data_dir, local_starfusion_dir)):
        util.check_call([
            'tar', 'xzf', args.starfusion_targz, '-C', args.starfusion_data_dir
        ])
        util.check_call([
            'make', '-C',
            os.path.join(args.starfusion_data_dir, local_starfusion_dir)
        ])

    match = re.match(r'.*/([^/]+)\.tar\.gz$',
                     args.starfusion_plug_n_play_targz)
    assert match
    local_plugnplay_dir = match[1]
    if not os.path.exists(args.starfusion_data_dir + local_plugnplay_dir):
        util.check_call([
            'tar', 'xzf', args.starfusion_plug_n_play_targz, '-C',
            args.starfusion_data_dir
        ])

    result_dir = args.result_dir + '/' + os.path.basename(sample_name +
                                                          '-starfusion')
    logging.info('Start starfusion benchmark: %s', result_dir)
    try:
        os.makedirs(result_dir, 0o755)
    except:
        logging.error("mkdir %s failed", result_dir)

    cached_r1 = ",".join([
        args.cache_dir + '/' + os.path.basename(fp.r1)
        for fp in cached_file_pairs
    ])
    cached_r2 = ",".join([
        args.cache_dir + '/' + os.path.basename(fp.r2)
        for fp in cached_file_pairs
    ])

    starfusion_args = ['docker', 'run']
    mounted: Set[str] = set()
    for dir in [args.starfusion_data_dir, args.result_dir, args.cache_dir]:
        if dir not in mounted:
            mounted.add(dir)
            starfusion_args += ['-v', f'{dir}:{dir}']
    starfusion_args += [
        '--rm', 'trinityctat/ctatfusion',
        os.path.join(args.starfusion_data_dir, local_starfusion_dir,
                     '/STAR-Fusion'), '--left_fq', cached_r1, '--right_fq',
        cached_r2, '--CPU', '56', '--genome_lib_dir',
        os.path.join(args.starfusion_data_dir, local_plugnplay_dir,
                     '/ctat_genome_lib_build_dir'), '-O', result_dir,
        '--FusionInspector', 'validate'
    ]
    try:
        util.check_call(starfusion_args)
    except Exception as e:
        logging.error("Starfusion failed (ignoring): %s", e)
    logging.info('Finished starfusion benchmark: %s', result_dir)
示例#25
0
def main() -> None:
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s:%(levelname)s: %(message)s")
    p = argparse.ArgumentParser()
    p.add_argument("--cache_dir",
                   default=util.DEFAULT_CACHE_DIR,
                   help="Benchmark cache dir")
    p.add_argument("--result_dir",
                   default=util.DEFAULT_RESULT_DIR,
                   help="Benchmark result dir")
    p.add_argument(
        "--starfusion_data_dir",
        default="/scratch-nvme/starfusion",
        help="Directory for expanding starfusion plug-n-play files",
    )
    p.add_argument(
        "--run",
        action="append",
        choices=["af4", "starfusion"],
        help="List of systems to run. If unset, run all the configured systems",
    )
    p.add_argument(
        "--starfusion_plug_n_play_targz",
        default=os.environ["HOME"] +
        "/GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz",
        help=
        "Tar.gz file of starfusion plug-n-play file. https://github.com/STAR-Fusion/STAR-Fusion/wiki#data-resources-required",
    )
    p.add_argument(
        "--starfusion_targz",
        default=os.environ["HOME"] + "/STAR-Fusion-v1.5.0.FULL.tar.gz",
        help=
        "Tar.gz file of starfusion source package. https://github.com/STAR-Fusion/STAR-Fusion/wiki#data-resources-required",
    )
    p.add_argument(
        "--brca_data_dir",
        default="/scratch-nvme/xyang/brca_rnaseq_data",
        help="BT474, KPL4, MCF7, SKBR3 Breast cancer data directory",
    )

    args = p.parse_args()
    if not args.run:
        args.run = ["af4", "starfusion"]

    ## brca rna-seq for af4
    brca_samples = [
        os.path.join(args.brca_data_dir, s)
        for s in ["BT474", "KPL4", "MCF7", "SKBR3"]
    ]
    for s in brca_samples:
        if not os.path.exists(os.path.join(args.brca_data_dir, s)):
            util.check_call([
                "download_brca_data.py",
                "--odir",
                "/scratch-nvme/xyang/brca_rnaseq_data",
            ])

    cosmic_fusion_path = (
        "s3://grail-publications/2019-ISMB/references/all_art_lod_brca.txt")
    for sample in brca_samples:
        r1s: List[str] = []
        for fq in os.listdir(sample):
            if "_1" in fq:
                r1s.append(os.path.join(sample, fq))
        cached_file_pairs: List[util.FASTQPair] = []
        for r1 in r1s:
            assert os.path.exists(r1.replace("_1", "_2"))
            cached_file_pairs.append(
                util.FASTQPair(r1=r1, r2=r1.replace("_1", "_2")))
        print(os.path.basename(sample))
        print(cached_file_pairs)

        run_af4(os.path.basename(sample), cached_file_pairs,
                cosmic_fusion_path, args)

    ## cfrna for af4 and starfusion
    cosmic_fusion_path = (
        "s3://grail-publications/2019-ISMB/references/all_pair_art_lod_gpair_merged.txt"
    )
    for sample in util.RNA_SAMPLES:
        fastq_files: List[str] = []
        cached_file_pairs: List[util.FASTQPair] = []
        for fp in sample.paths:
            assert fp.r1.replace("R1", "R2") == fp.r2, fp.r2
            fastq_files += [fp.r1, fp.r2]
            cached_file_pairs.append(
                util.FASTQPair(
                    r1=args.cache_dir + "/" + os.path.basename(fp.r1),
                    r2=args.cache_dir + "/" + os.path.basename(fp.r2),
                ))
        util.s3_cache_files(fastq_files, args.cache_dir)

        if "af4" in args.run:
            run_af4(sample.name, cached_file_pairs, cosmic_fusion_path, args)
        if "starfusion" in args.run:
            run_starfusion(sample.name, cached_file_pairs, args)
示例#26
0
def run_starfusion(sample_name: str, cached_file_pairs: List[util.FASTQPair],
                   args: Any):
    match = re.match(r".*/([^/]+)\.FULL\.tar\.gz$", args.starfusion_targz)
    assert match
    local_starfusion_dir = match[1]

    logging.info("LOCAL: %s", local_starfusion_dir)
    if not os.path.exists(
            os.path.join(args.starfusion_data_dir, local_starfusion_dir)):
        util.check_call([
            "tar", "xzf", args.starfusion_targz, "-C", args.starfusion_data_dir
        ])
        util.check_call([
            "make", "-C",
            os.path.join(args.starfusion_data_dir, local_starfusion_dir)
        ])

    match = re.match(r".*/([^/]+)\.tar\.gz$",
                     args.starfusion_plug_n_play_targz)
    assert match
    local_plugnplay_dir = match[1]
    if not os.path.exists(args.starfusion_data_dir + local_plugnplay_dir):
        util.check_call([
            "tar",
            "xzf",
            args.starfusion_plug_n_play_targz,
            "-C",
            args.starfusion_data_dir,
        ])

    result_dir = args.result_dir + "/" + os.path.basename(sample_name +
                                                          "-starfusion")
    logging.info("Start starfusion benchmark: %s", result_dir)
    try:
        os.makedirs(result_dir, 0o755)
    except:
        logging.error("mkdir %s failed", result_dir)

    cached_r1 = ",".join([
        args.cache_dir + "/" + os.path.basename(fp.r1)
        for fp in cached_file_pairs
    ])
    cached_r2 = ",".join([
        args.cache_dir + "/" + os.path.basename(fp.r2)
        for fp in cached_file_pairs
    ])

    starfusion_args = ["docker", "run"]
    mounted: Set[str] = set()
    for dir in [args.starfusion_data_dir, args.result_dir, args.cache_dir]:
        if dir not in mounted:
            mounted.add(dir)
            starfusion_args += ["-v", f"{dir}:{dir}"]
    starfusion_args += [
        "--rm",
        "trinityctat/ctatfusion",
        os.path.join(args.starfusion_data_dir, local_starfusion_dir,
                     "STAR-Fusion"),
        "--left_fq",
        cached_r1,
        "--right_fq",
        cached_r2,
        "--CPU",
        "56",
        "--genome_lib_dir",
        os.path.join(args.starfusion_data_dir, local_plugnplay_dir,
                     "ctat_genome_lib_build_dir"),
        "-O",
        result_dir,
        "--FusionInspector",
        "validate",
    ]
    try:
        util.check_call(starfusion_args)
    except Exception as e:
        logging.error("Starfusion failed (ignoring): %s", e)
    logging.info("Finished starfusion benchmark: %s", result_dir)
示例#27
0
def buildkite_upload(pattern, **kwargs):
    util.check_call(['buildkite-agent', 'artifact', 'upload', pattern],
                    **kwargs)
示例#28
0
                    unit.include_dirs.append(env['CUNIT_INCLUDE_DIR'])
                    unit.libraries.append(env['CUNIT_LIBRARIES'])
                    unit.library_dirs.append(env['CUNIT_LIB_DIR'])
                    self.distribution.native_executables.append(unit)

            ## build w/ distutils thru backdoor
            cmd_obj = self.get_command_obj('build_exe')
            cmd_obj.ensure_finalized()
            cmd_obj.run()
            self.distribution.native_executables = orig_exes

            for pkg, units in self._get_c_tests():
                log.info('C unit tests for ' + pkg)
                for unit in units:
                    try:
                        util.check_call([os.path.join(lib_dir, unit.name)])
                    except Exception, e:
                        failed = True
                        print e

        ## C++
        if self._has_cpp_tests():
            sys.std_err.write("C++ unit testing is untested!") #FIXME

            from configure import cppunit
            env = dict()
            if not cppunit.is_installed(env, None):
                cppunit.install(env, None)

            orig_exes = self.distribution.native_executables
示例#29
0
def buildkite_download(pattern, destination, **kwargs):
    util.check_call(
        ['buildkite-agent', 'artifact', 'download', pattern, destination],
        **kwargs)
示例#30
0
def cmd_build(args, remainder):
    with open('ci/plan.yml') as file_:
        plan = yaml.safe_load(file_)

    env = os.environ.copy()
    variant = plan['VARIANTS'][args.variant]
    for key, value in variant.get('env', {}).items():
        env[key] = str(value)

    build_root = variant.get('build_root', 'build-x86_64')
    build_type = variant.get('build_type', 'Release')
    check = variant.get('check', 'smoke')
    system = variant.get('system', 'Linux')

    temp_dir = Path('/tmp') / os.getenv('BUILDKITE_AGENT_NAME')
    build_dir = Path(build_root) / build_type
    logs_dir = Path('logs').resolve()
    logs_dir.mkdir(parents=True, exist_ok=True)

    util.printf('--- :building_construction: configure')
    configure_log = logs_dir / 'configure.log'
    with configure_log.open('wb') as fp:
        util.check_call([
            'python', 'configure', '--ci', f'--temp={temp_dir}',
            f'--type={build_type}'
        ],
                        env=env,
                        stdout=fp,
                        stderr=subprocess.STDOUT)

    util.printf('--- :hammer_and_wrench: ninja')
    util.check_call(['ninja', '-C', build_dir], env=env)

    util.printf('--- :hammer_and_wrench: ninja package')
    util.check_call(['ninja', '-C', build_dir, 'package'], env=env)

    util.printf(f'--- :hammer_and_wrench: ninja check-{check}')
    check_log = logs_dir / f'check-{check}.log'
    with check_log.open('wb') as fp:
        util.check_call(['ninja', '-C', build_dir, f'check-{check}'],
                        env=env,
                        stdout=fp,
                        stderr=subprocess.STDOUT)

    util.printf('--- Test devkit')
    devkit_dir = build_dir / '_CPack_Packages' / system / 'TGZ' / f'PlaidML-1.0.0-{system}' / 'devkit'
    devkit_build_dir = devkit_dir / 'build'
    cmd = ['cmake']
    cmd += ['-S', devkit_dir]
    cmd += ['-B', devkit_build_dir]
    cmd += ['-G', 'Ninja']
    util.check_call(cmd, env=env)
    util.check_call(['ninja', '-C', devkit_build_dir], env=env)
    util.check_call([devkit_build_dir / 'edsl_test'], env=env)

    if 'dbg' not in args.variant:
        util.buildkite_upload(build_dir / '*.whl')
        util.buildkite_upload(build_dir / '*.tar.gz')
示例#31
0
def main() -> None:
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s:%(levelname)s: %(message)s")

    p = argparse.ArgumentParser()
    p.add_argument("--cache_dir",
                   default=util.DEFAULT_CACHE_DIR,
                   help="Benchmark cache dir")
    p.add_argument("--result_dir",
                   default=util.DEFAULT_RESULT_DIR,
                   help="Benchmark result dir")
    p.add_argument(
        "--rerun_af4",
        action="store_true",
        help="Always run AF4 even if the result file already exists",
    )
    p.add_argument(
        "--recache_files",
        action="store_true",
        help=
        "Always copy benchmark data files, even if they already exist locally.",
    )
    args = p.parse_args()
    util.s3_cache_files(
        [
            util.REFERENCE_DIR +
            "/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa",
            util.REFERENCE_DIR + "/all_pair_art_lod_gpair_merged.txt",
            util.REFERENCE_DIR + "/liu_gpair.txt",
        ],
        args.cache_dir,
    )
    for mode in ["denovo", "targeted"]:
        for sample in util.SIMULATED_SAMPLES:
            util.s3_cache_files([sample.path.r1, sample.path.r2],
                                args.cache_dir)
            result_dir = (
                f"{args.result_dir}/synthetic-{mode}-{sample.n}-{sample.coverage}"
            )
            try:
                os.makedirs(result_dir, 0o755)
            except:
                logging.error("mkdir %s failed", result_dir)
            if not os.path.exists(
                    f"{result_dir}/filtered.fa") or args.rerun_af4:
                logging.info("running benchmark in %s", result_dir)
                af4_args = [
                    str(util.af4_path()),
                    f"-log_dir={result_dir}",
                    f"-r1={args.cache_dir}/{sample.path.r1}",
                    f"-r2={args.cache_dir}/{sample.path.r2}",
                    f"-fasta-output={result_dir}/all.fa",
                    f"-filtered-output={result_dir}/filtered.fa",
                    f"-max-genes-per-kmer=2",
                    f"-max-proximity-distance=1000",
                    f"-max-proximity-genes=5",
                    "-transcript=" + args.cache_dir +
                    "/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa",
                ]
                if mode == "targeted":
                    af4_args.append("-cosmic-fusion=" + args.cache_dir +
                                    "/all_pair_art_lod_gpair_merged.txt")
                util.check_call(af4_args)
                logging.info("Runtime stats: %s",
                             util.run_stats(Path(result_dir)))

            stats = TargetedFusionStats(
                Path(f"{args.cache_dir}/liu_gpair.txt"),
                Path(f"{result_dir}/filtered.fa"),
            )

            s = stats.stats()
            tp = "%d" % (s.tp, )
            fp = "%d" % (s.fp, )
            fn = "%d" % (s.fn, )
            print(
                f"{mode} & {sample.n} & {sample.coverage} & {tp} & {fp} & {fn}\\\\"
            )