def main(): args = parse_args() backend = hp.LocalBackend(gsa_key_file=os.path.abspath("misc-270914-cb9992ec9b25.json")) if args.local else hp.BatchBackend(args.project) p = hp.Pipeline(backend=backend, name=args.name) for i, vcf in enumerate([ "gs://macarthurlab-rnaseq/grch38_vcfs/RGP_273_3_R1.SNPs.vcf.gz", "gs://macarthurlab-rnaseq/grch38_vcfs/RGP_248_3.SNPs.vcf.gz", "gs://macarthurlab-rnaseq/grch38_vcfs/RGP_54_3_2.SNPs.vcf.gz", #"gs://macarthurlab-rnaseq/grch38_vcfs/RGP_7_1_2.SNPs.vcf.gz", #"gs://macarthurlab-rnaseq/grch38_vcfs/RGP_7_2_2.SNPs.vcf.gz", #"gs://macarthurlab-rnaseq/grch38_vcfs/RGP_7_3_2.SNPs.vcf.gz", #"gs://macarthurlab-rnaseq/grch38_vcfs/RGP_7_4_2.SNPs.vcf.gz", #"gs://macarthurlab-rnaseq/grch38_vcfs/RGP_7_5_2.SNPs.vcf.gz", ]): t = p.new_task(name=f"olego{i}") hg38_fasta = p.read_input_group(fa=os.path.expanduser("~/p1/ref/GRCh38/hg38.fa"), fai=os.path.expanduser("~/p1/ref/GRCh38/hg38.fa.fai")) t.image("weisburd/olego:latest") # switch to user account t.command(f"gcloud auth activate-service-account --key-file /gsa-key/key.json") t.command(f"gsutil -m cp -r gs://weisburd-misc/creds/.config /tmp/") t.command(f"rm -rf ~/.config") t.command(f"mv /tmp/.config ~/") t.command(f"gcloud config set account [email protected]") t.command(f"gcloud config set project seqr-project") output_dir = os.path.basename(vcf).replace(".SNPs.vcf.gz", "") #t.command(f"gsutil -m cp {hg38_fasta} .") #t.command(f"gsutil -m cp {HG38_FASTA}.fai .") t.command(f"gsutil -m cp {vcf}* .") t.command(f"mkdir {output_dir}`") t.command(f"create_genomes --fasta {hg38_fasta.fa} --vcf {os.path.basename(vcf)} --outdir {output_dir}") t.command(f"gsutil -m cp -r {output_dir} gs://macarthurlab-rnaseq/grch38_personal_reference/") #if args.local: # p.write_output(t.ofile, 'temp.txt') #else: # p.write_output(t.ofile, 'gs://gnomad-bw2/temp.txt') p.run() break #t.command(f"gsutil ls > {t.ofile}") if isinstance(backend, hp.BatchBackend): backend.close()
def stress(): p = pipeline.Pipeline( name='stress', backend=pipeline.BatchBackend(billing_project='hail'), default_image='ubuntu:18.04') for i in range(100): t = (p.new_task(name=f'parent_{i}')) d = random.choice(range(4)) if flip(0.2): t.command(f'sleep {d}; exit 1') else: t.command(f'sleep {d}; echo parent {i}') for j in range(10): d = random.choice(range(4)) c = (p.new_task(name=f'child_{i}_{j}').command( f'sleep {d}; echo child {i} {j}')) c.depends_on(t) if flip(0.2): c._always_run = True p.run(open=False, wait=False)
if __name__ == '__main__': if len(sys.argv) != 6: raise RuntimeError(f'usage: <script.py> DOCKER_IMAGE_URL BUCKET_BASE SHA N_REPLICATES N_ITERS') BENCHMARK_IMAGE = sys.argv[1] BUCKET_BASE = sys.argv[2] SHA = sys.argv[3] N_REPLICATES = int(sys.argv[4]) N_ITERS = int(sys.argv[5]) labeled_sha = SHA label = os.environ.get('BENCHMARK_LABEL') if label: labeled_sha = f'{labeled_sha}-{label}' p = pl.Pipeline(name=f'benchmark-{labeled_sha}', backend=pl.BatchBackend(billing_project='hail'), default_image=BENCHMARK_IMAGE, default_storage='100G', default_memory='7G', default_cpu=2) resource_tasks = {} for r in all_resources: t = p.new_task(f'create_resource_{r.name()}').cpu(4) t.command(f'hail-bench create-resources --data-dir benchmark-resources --group {r.name()}') t.command(f"time tar -cf {r.name()}.tar benchmark-resources/{r.name()} --exclude='*.crc'") t.command(f'ls -lh {r.name()}.tar') t.command(f'mv {r.name()}.tar {t.ofile}') resource_tasks[r] = t all_benchmarks = list_benchmarks() assert len(all_benchmarks) > 0
from hailtop.hailctl.dev.benchmark.run.utils import list_benchmarks if __name__ == '__main__': if len(sys.argv) != 6: raise RuntimeError( f'usage: <script.py> DOCKER_IMAGE_URL BUCKET_BASE SHA N_REPLICATES N_ITERS' ) BENCHMARK_IMAGE = sys.argv[1] BUCKET_BASE = sys.argv[2] SHA = sys.argv[3] N_REPLICATES = int(sys.argv[4]) N_ITERS = int(sys.argv[5]) p = pl.Pipeline(name='benchmark', backend=pl.BatchBackend(), default_image=BENCHMARK_IMAGE, default_storage='10G', default_memory='7G', default_cpu=2) make_resources = p.new_task('create_resources').cpu(4) make_resources.command( 'hailctl dev benchmark create-resources --data-dir benchmark-resources' ) make_resources.command( "time tar -czf benchmark-resources.tar.gz benchmark-resources --exclude='*.crc'" ) make_resources.command('ls -lh benchmark-resources.tar.gz') make_resources.command( f'mv benchmark-resources.tar.gz {make_resources.ofile}') all_benchmarks = list_benchmarks()
from hailtop.hailctl.dev.benchmark.run.utils import list_benchmarks if __name__ == '__main__': if len(sys.argv) != 6: raise RuntimeError( f'usage: <script.py> DOCKER_IMAGE_URL BUCKET_BASE SHA N_REPLICATES N_ITERS' ) BENCHMARK_IMAGE = sys.argv[1] BUCKET_BASE = sys.argv[2] SHA = sys.argv[3] N_REPLICATES = int(sys.argv[4]) N_ITERS = int(sys.argv[5]) p = pl.Pipeline(name='benchmark', backend=pl.BatchBackend(url='https://batch.hail.is'), default_image=BENCHMARK_IMAGE, default_storage='10G', default_memory='7G', default_cpu=2) make_resources = p.new_task('create_resources') make_resources.command( 'hailctl dev benchmark create-resources --data-dir benchmark-resources' ) make_resources.command( "time tar -czvf benchmark-resources.tar.gz benchmark-resources --exclude='*.crc'" ) make_resources.command('ls -lh benchmark-resources.tar.gz') make_resources.command( f'mv benchmark-resources.tar.gz {make_resources.ofile}') all_benchmarks = list_benchmarks()