示例#1
0
def vcf_to_h5(vcf_fpath, out_h5_fpath, n_threads, preread_nvars, tmp_dir,
              kept_fields=None, ignored_fields=None):
    if not os.path.exists(tmp_dir):
        os.mkdir(tmp_dir)

    chroms = get_chroms_in_vcf(vcf_fpath)
    max_field_lens, max_field_str_lens = _get_max_field(vcf_fpath,
                                                        preread_nvars,
                                                        kept_fields=kept_fields,
                                                        ignored_fields=ignored_fields)

    partial_parse_vcf = partial(_parse_vcf, vcf_fpath=vcf_fpath,
                                tmp_dir=tmp_dir,
                                max_field_lens=max_field_lens,
                                max_field_str_lens=max_field_str_lens,
                                kept_fields=kept_fields,
                                ignored_fields=ignored_fields)
    with Pool(n_threads) as pool:
        try:
            h5_chroms_fpaths = pool.map(partial_parse_vcf, chroms)
        except Exception:
            remove_temp_file_in_dir(tmp_dir, '.tmp.h5')
            raise

    try:
        _merge_h5(h5_chroms_fpaths, out_h5_fpath)
    except Exception:
        raise
    finally:
        _remove_temp_chrom_h5s(h5_chroms_fpaths)
示例#2
0
def write_vcf_parallel(variations, out_fhand, n_threads, tmp_dir,
                       chunk_size=None, vcf_format=VCF_FORMAT):
    _write_vcf_meta(variations, out_fhand, vcf_format=vcf_format)
    _write_vcf_header(variations, out_fhand)

    grouped_paths = _group_variations_paths(variations)
    _partial_write_snvs = partial(_write_snvs_parallel, tmp_dir=tmp_dir,
                                  grouped_paths=grouped_paths)

    with Pool(n_threads) as pool:
        try:
            vcf_fpaths = pool.map(_partial_write_snvs,
                                  _numbered_chunks(variations, chunk_size))
        except Exception:
            remove_temp_file_in_dir(tmp_dir, '.vcf.h5')
            raise
    try:
        _merge_vcfs(vcf_fpaths, out_fhand)
    except Exception:
        raise
    finally:
        for vcf_fpath in vcf_fpaths:
            if os.path.exists(vcf_fpath):
                os.remove(vcf_fpath)
示例#3
0
def write_vcf_parallel(variations, out_fhand, n_threads, tmp_dir,
                       chunk_size=None, vcf_format=VCF_FORMAT):
    _write_vcf_meta(variations, out_fhand, vcf_format=vcf_format)
    _write_vcf_header(variations, out_fhand)

    grouped_paths = _group_variations_paths(variations)
    _partial_write_snvs = partial(_write_snvs_parallel, tmp_dir=tmp_dir,
                                  grouped_paths=grouped_paths)

    with Pool(n_threads) as pool:
        try:
            vcf_fpaths = pool.map(_partial_write_snvs,
                                  _numbered_chunks(variations, chunk_size))
        except Exception:
            remove_temp_file_in_dir(tmp_dir, '.vcf.h5')
            raise
    try:
        _merge_vcfs(vcf_fpaths, out_fhand)
    except Exception:
        raise
    finally:
        for vcf_fpath in vcf_fpaths:
            if os.path.exists(vcf_fpath):
                os.remove(vcf_fpath)
示例#4
0
def vcf_to_h5(vcf_fpath, out_h5_fpath, n_threads, tmp_dir, kept_fields=None,
              ignored_fields=None):
    if not os.path.exists(tmp_dir):
        os.mkdir(tmp_dir)

    chroms = get_chroms_in_vcf(vcf_fpath)

    partial_parse_vcf = partial(_parse_vcf, vcf_fpath=vcf_fpath,
                                tmp_dir=tmp_dir,
                                kept_fields=kept_fields,
                                ignored_fields=ignored_fields)
    with Pool(n_threads) as pool:
        try:
            h5_chroms_fpaths = pool.map(partial_parse_vcf, chroms)
        except Exception:
            remove_temp_file_in_dir(tmp_dir, '.tmp.h5')
            raise

    try:
        _merge_h5(h5_chroms_fpaths, out_h5_fpath)
    except Exception:
        raise
    finally:
       _remove_temp_chrom_h5s(h5_chroms_fpaths)