def extract_rna_info(chrom_info_file, raw_allelic_counts_dir, genotype_dir,time_step, target_regions_dir): # make dictionary of identifier => index mapping all_genotype_samples_file = genotype_dir + 'all_genotyped_samples.txt' samp_idx = get_samples_index(all_genotype_samples_file) # Initialize chromosome objects chrom_list = chromosome.get_all_chromosomes(chrom_info_file) chrom_dict = chromosome.get_chromosome_dict(chrom_info_file) snp_files = SNPFiles(genotype_dir + 'snp_tab.h5',genotype_dir + 'snp_index.h5',genotype_dir+'haps.h5') # STEP 1: make combined HDF5 files of AS counts, # total mapped read counts, and genotype counts individuals = get_individual_array(target_regions_dir + 'rna_seq_samples_' + str(time_step) + '.txt') combined_files = CombinedFiles(raw_allelic_counts_dir, chrom_list,time_step) for ind in individuals: print(ind) sample_id = ind + '_' + str(time_step) count_files = CountFiles(raw_allelic_counts_dir, sample_id) ind_idx = samp_idx[ind] combined_files.add_counts(chrom_list, count_files, snp_files, ind_idx) count_files.close() return combined_files
def main(): sys.stderr.write("cmd: %s\n" % " ".join(sys.argv)) args = parse_args() out_f = None if args.output_file: if args.output_file.endswith(".gz"): out_f = gzip.open(args.output_file, "wt") else: out_f = open(args.output_file, "wt") else: out_f = sys.stdout # make dictionary of identifier => index mapping samp_idx = get_samples_index(args) # read individuals individuals = read_individuals(args, samp_idx) chrom_list = chromosome.get_all_chromosomes(args.chrom) chrom_dict = chromosome.get_chromosome_dict(args.chrom) combined_files = CombinedFiles(OUTPUT_DIR, chrom_list) snp_files = SNPFiles(args) # STEP 1: make combined HDF5 files of AS counts, # total mapped read counts, and genotype counts sys.stderr.write("summing genotypes and read counts across individuals\n") for ind in individuals: # open count files for this indivudal sys.stderr.write("individual: %s\n" % ind) count_files = CountFiles(args.read_count_dir, ind) ind_idx = samp_idx[ind] # add counts to combined totals combined_files.add_counts(chrom_list, count_files, snp_files, ind_idx) count_files.close() sys.stderr.write("generating list of target regions\n") # STEP 2: generate list of target regions centered on test SNPs: write_target_regions(out_f, args, chrom_list, combined_files, snp_files) combined_files.close() snp_files.close()
def main(): args = parse_args() snp_tab_h5 = tables.openFile(args.snp_tab, "r") snp_index_h5 = tables.openFile(args.snp_index, "r") if args.haplotype: hap_h5 = tables.openFile(args.haplotype, "r") ind_idx = lookup_individual_index(args.samples, args.individual) else: hap_h5 = None ind_idx = None ref_count_h5 = tables.openFile(args.ref_as_counts, "w") alt_count_h5 = tables.openFile(args.alt_as_counts, "w") other_count_h5 = tables.openFile(args.other_as_counts, "w") read_count_h5 = tables.openFile(args.read_counts, "w") output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5] chrom_dict = {} # initialize every chromosome in output files chrom_list = chromosome.get_all_chromosomes(args.chrom) for chrom in chrom_list: for out_file in output_h5: create_carray(out_file, chrom, args.data_type) chrom_dict[chrom.name] = chrom count = 0 dtype = None if args.data_type == "uint8": max_count = MAX_UINT8_COUNT dtype = np.uint8 elif args.data_type == "uint16": max_count = MAX_UINT16_COUNT dtype = np.uint16 else: raise NotImplementedError("unsupported datatype %s" % args.data_type) for chrom in chrom_list: sys.stderr.write("%s\n" % chrom.name) warned_pos = {} # fetch SNP info for this chromosome if chrom.name not in snp_tab_h5.root: # no SNPs for this chromosome continue sys.stderr.write("fetching SNPs\n") snp_tab = snp_tab_h5.getNode("/%s" % chrom.name) snp_index_array = snp_index_h5.getNode("/%s" % chrom.name)[:] if hap_h5: hap_tab = hap_h5.getNode("/%s" % chrom.name) else: hap_tab = None # initialize count arrays for this chromosome to 0 ref_carray = get_carray(ref_count_h5, chrom) alt_carray = get_carray(alt_count_h5, chrom) other_carray = get_carray(other_count_h5, chrom) read_count_carray = get_carray(read_count_h5, chrom) ref_array = np.zeros(chrom.length, dtype) alt_array = np.zeros(chrom.length, dtype) other_array = np.zeros(chrom.length, dtype) read_count_array = np.zeros(chrom.length, dtype) # loop over all BAM files, pulling out reads # for this chromosome for bam_filename in args.bam_filenames: sys.stderr.write("reading from file %s\n" % bam_filename) samfile = pysam.Samfile(bam_filename, "rb") for read in get_sam_iter(samfile, chrom): count += 1 if count == 10000: sys.stderr.write(".") count = 0 add_read_count(read, chrom, ref_array, alt_array, other_array, read_count_array, snp_index_array, snp_tab, hap_tab, warned_pos, max_count, ind_idx) # store results for this chromosome ref_carray[:] = ref_array alt_carray[:] = alt_array other_carray[:] = other_array read_count_carray[:] = read_count_array sys.stderr.write("\n") samfile.close() # set track statistics and close HDF5 files sys.stderr.write("setting statistics for each chromosome\n") for h5f in output_h5: chromstat.set_stats(h5f, chrom_list) h5f.close() snp_tab_h5.close() snp_index_h5.close() if hap_h5: hap_h5.close() sys.stderr.write("done\n")
def main(): args = parse_args() sys.stderr.write("command line: %s\n" % " ".join(sys.argv)) sys.stderr.write("python version: %s\n" % sys.version) sys.stderr.write("pysam version: %s\n" % pysam.__version__) sys.stderr.write("pytables version: %s\n" % tables.__version__) util.check_pysam_version() util.check_pytables_version() # disable warnings that come from pytables when chromosome # names are like 1, 2, 3 (instead of chr1, chr2, chr3) warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) snp_tab_h5 = tables.open_file(args.snp_tab, "r") snp_index_h5 = tables.open_file(args.snp_index, "r") if args.haplotype: hap_h5 = tables.open_file(args.haplotype, "r") else: hap_h5 = None ref_count_h5 = tables.open_file(args.ref_as_counts, "w") alt_count_h5 = tables.open_file(args.alt_as_counts, "w") other_count_h5 = tables.open_file(args.other_as_counts, "w") read_count_h5 = tables.open_file(args.read_counts, "w") output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5] chrom_dict = {} # initialize every chromosome in output files chrom_list = chromosome.get_all_chromosomes(args.chrom) for chrom in chrom_list: for out_file in output_h5: create_carray(out_file, chrom, args.data_type) chrom_dict[chrom.name] = chrom count = 0 dtype = None if args.data_type == "uint8": max_count = MAX_UINT8_COUNT dtype = np.uint8 elif args.data_type == "uint16": max_count = MAX_UINT16_COUNT dtype = np.uint16 else: raise NotImplementedError("unsupported datatype %s" % args.data_type) # create a txt file to also holds the counts if args.txt_counts is not None: if os.path.splitext(args.txt_counts)[1] == ".gz": txt_counts = gzip.open(args.txt_counts, 'wt+') else: txt_counts = open(args.txt_counts, 'w+') for chrom in chrom_list: sys.stderr.write("%s\n" % chrom.name) if args.test_chrom: if chrom.name != args.test_chrom: sys.stderr.write("skipping because not test chrom\n") continue warned_pos = {} # fetch SNP info for this chromosome if chrom.name not in snp_tab_h5.root: # no SNPs for this chromosome sys.stderr.write("skipping %s because chromosome with this name " "not found in SNP table\n" % chrom.name) continue sys.stderr.write("fetching SNPs\n") snp_tab = snp_tab_h5.get_node("/%s" % chrom.name) snp_index_array = snp_index_h5.get_node("/%s" % chrom.name)[:] if hap_h5: hap_tab = hap_h5.get_node("/%s" % chrom.name) ind_dict, ind_idx = snptable.SNPTable().get_h5_sample_indices( hap_h5, chrom, [args.individual]) if len(ind_idx) == 1: ind_idx = ind_idx[0] sys.stderr.write("index for individual %s is %d\n" % (args.individual, ind_idx)) else: raise ValueError("got sample indices for %d individuals, " "but expected to get index for one " "individual (%s)" % (len(ind_idx), args.individual)) hap_tab = None ind_idx = None else: hap_tab = None ind_idx = None # initialize count arrays for this chromosome to 0 ref_carray = get_carray(ref_count_h5, chrom) alt_carray = get_carray(alt_count_h5, chrom) other_carray = get_carray(other_count_h5, chrom) read_count_carray = get_carray(read_count_h5, chrom) ref_array = np.zeros(chrom.length, dtype) alt_array = np.zeros(chrom.length, dtype) other_array = np.zeros(chrom.length, dtype) read_count_array = np.zeros(chrom.length, dtype) # loop over all BAM files, pulling out reads # for this chromosome for bam_filename in args.bam_filenames: sys.stderr.write("reading from file %s\n" % bam_filename) samfile = pysam.Samfile(bam_filename, "rb") for read in get_sam_iter(samfile, chrom): count += 1 if count == 10000: sys.stderr.write(".") count = 0 add_read_count(read, chrom, ref_array, alt_array, other_array, read_count_array, snp_index_array, snp_tab, hap_tab, warned_pos, max_count, ind_idx) # store results for this chromosome ref_carray[:] = ref_array alt_carray[:] = alt_array other_carray[:] = other_array read_count_carray[:] = read_count_array sys.stderr.write("\n") # write data to numpy arrays, so that they can be written to a txt # file later # columns are: # chrom, pos, ref, alt, genotype, ref_count, alt_count, other_count if args.txt_counts is not None: write_txt_file(txt_counts, chrom, snp_tab, hap_tab, ind_idx, ref_array, alt_array, other_array) samfile.close() if args.txt_counts: # close the open txt file handler txt_counts.close() # check if any of the reads contained an unimplemented CIGAR if unimplemented_CIGAR[0] > 0: sys.stderr.write("WARNING: Encountered " + str(unimplemented_CIGAR[0]) + " instances of CIGAR codes: " + str(unimplemented_CIGAR[1]) + ". Reads with these " "CIGAR codes were skipped because they " "are currently unimplemented.\n") # set track statistics and close HDF5 files sys.stderr.write("setting statistics for each chromosome\n") for h5f in output_h5: chromstat.set_stats(h5f, chrom_list) h5f.close() snp_tab_h5.close() snp_index_h5.close() if hap_h5: hap_h5.close() sys.stderr.write("done\n")
def main(): args = parse_args() write_header(sys.stdout) # find index of individual in list of samples ind_idx = lookup_individual_index(args, args.individual) data_files = DataFiles(args) chrom_list = chromosome.get_all_chromosomes(args.chrom) chrom_dict = chromosome.get_chromosome_dict(args.chrom) genomewide_read_counts = get_genomewide_count(data_files.read_count_h5, chrom_list) if args.input_file.endswith(".gz"): f = gzip.open(args.input_file) else: f = open(args.input_file) line_count = 0 if args.target_region_size: sys.stderr.write("setting target region size to %d\n" % args.target_region_size) for line in f: line_count += 1 if line_count % 1000 == 0: sys.stderr.write(".") if line.startswith("#"): continue words = line.rstrip().split() if words[1] == "NA": # no SNP defined on this line: write_NA_line(sys.stdout) continue chrom_name = words[0] chrom = chrom_dict[chrom_name] region_list = get_target_regions(args, chrom, words) snp_pos = int(words[1]) snp_ref_base = words[3] snp_alt_base = words[4] # TODO: check that SNP ref/alt match? snp_region = coord.Coord(chrom, snp_pos, snp_pos) # pull out all of the SNPs in the target region(s) region_snps = get_region_snps(data_files, region_list, ind_idx) # pull out test SNP test_snp_list = get_region_snps(data_files, [snp_region], ind_idx) if len(test_snp_list) != 1: test_snp = None sys.stderr.write("WARNING: could not find test SNP at " "position %s:%d\n" % (chrom.name, snp_pos)) het_snps = [] else: test_snp = test_snp_list[0] # pull out haplotype counts from linked heterozygous SNPs het_snps = get_het_snps(region_snps) set_snp_counts(data_files, region_list, het_snps, test_snp, args) region_read_counts = get_region_read_counts(data_files, region_list) write_output(sys.stdout, region_list, het_snps, test_snp, snp_pos, region_read_counts, genomewide_read_counts) sys.stderr.write("\n") f.close() data_files.close()
def main(): args = parse_args() sys.stderr.write("command line: %s\n" % " ".join(sys.argv)) sys.stderr.write("python version: %s\n" % sys.version) sys.stderr.write("pysam version: %s\n" % pysam.__version__) sys.stderr.write("pytables version: %s\n" % tables.__version__) util.check_pysam_version() util.check_pytables_version() snp_tab_h5 = tables.open_file(args.snp_tab, "r") snp_index_h5 = tables.open_file(args.snp_index, "r") if args.haplotype: hap_h5 = tables.open_file(args.haplotype, "r") ind_idx = lookup_individual_index(args.samples, args.individual) else: hap_h5 = None ind_idx = None ref_count_h5 = tables.open_file(args.ref_as_counts, "w") alt_count_h5 = tables.open_file(args.alt_as_counts, "w") other_count_h5 = tables.open_file(args.other_as_counts, "w") read_count_h5 = tables.open_file(args.read_counts, "w") output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5] chrom_dict = {} # initialize every chromosome in output files chrom_list = chromosome.get_all_chromosomes(args.chrom) for chrom in chrom_list: for out_file in output_h5: create_carray(out_file, chrom, args.data_type) chrom_dict[chrom.name] = chrom count = 0 dtype = None if args.data_type == "uint8": max_count = MAX_UINT8_COUNT dtype = np.uint8 elif args.data_type == "uint16": max_count = MAX_UINT16_COUNT dtype = np.uint16 else: raise NotImplementedError("unsupported datatype %s" % args.data_type) # create a list to hold the counts that will be later written # to a txt file if args.text_counts is not None: txt_counts = list() for chrom in chrom_list: sys.stderr.write("%s\n" % chrom.name) warned_pos = {} # fetch SNP info for this chromosome if chrom.name not in snp_tab_h5.root: # no SNPs for this chromosome continue sys.stderr.write("fetching SNPs\n") snp_tab = snp_tab_h5.get_node("/%s" % chrom.name) snp_index_array = snp_index_h5.get_node("/%s" % chrom.name)[:] if hap_h5: hap_tab = hap_h5.get_node("/%s" % chrom.name) else: hap_tab = None # initialize count arrays for this chromosome to 0 ref_carray = get_carray(ref_count_h5, chrom) alt_carray = get_carray(alt_count_h5, chrom) other_carray = get_carray(other_count_h5, chrom) read_count_carray = get_carray(read_count_h5, chrom) ref_array = np.zeros(chrom.length, dtype) alt_array = np.zeros(chrom.length, dtype) other_array = np.zeros(chrom.length, dtype) read_count_array = np.zeros(chrom.length, dtype) # loop over all BAM files, pulling out reads # for this chromosome for bam_filename in args.bam_filenames: sys.stderr.write("reading from file %s\n" % bam_filename) samfile = pysam.Samfile(bam_filename, "rb") for read in get_sam_iter(samfile, chrom): count += 1 if count == 10000: sys.stderr.write(".") count = 0 add_read_count(read, chrom, ref_array, alt_array, other_array, read_count_array, snp_index_array, snp_tab, hap_tab, warned_pos, max_count, ind_idx) # store results for this chromosome ref_carray[:] = ref_array alt_carray[:] = alt_array other_carray[:] = other_array read_count_carray[:] = read_count_array sys.stderr.write("\n") # write data to numpy arrays, so that they can be written to a txt # file later # columns are: # chrom, pos, ref, alt, genotype, ref_count, alt_count, other_count if args.text_counts is not None: chrom = np.tile(chrom.name, len(snp_tab)) pos = np.array([snp['pos'] for snp in snp_tab]) ref = np.array([snp['allele1'] for snp in snp_tab]) alt = np.array([snp['allele2'] for snp in snp_tab]) if hap_tab is not None: genotype = np.array([str(hap[0])+"|"+str(hap[1]) for hap in hap_tab]) else: genotype = np.empty((len(snp_tab), 0)) txt_counts.append( np.column_stack((chrom, pos, ref, alt, genotype, ref_array[pos-1], alt_array[pos-1], other_array[pos-1])) ) samfile.close() # write the txt_counts np arrays to a txt file if args.text_counts is not None: # we use vstack to combine np arrays row-wise into a multi-dimensional # array np.savetxt(args.text_counts, np.vstack(tuple(txt_counts)), fmt="%1s", delimiter=" ") # set track statistics and close HDF5 files sys.stderr.write("setting statistics for each chromosome\n") for h5f in output_h5: chromstat.set_stats(h5f, chrom_list) h5f.close() snp_tab_h5.close() snp_index_h5.close() if hap_h5: hap_h5.close() sys.stderr.write("done\n")
def main(): args = parse_args() write_header(sys.stdout) # find index of individual in list of samples ind_idx = lookup_individual_index(args, args.individual) data_files = DataFiles(args) chrom_list = chromosome.get_all_chromosomes(args.chrom) chrom_dict = chromosome.get_chromosome_dict(args.chrom) genomewide_read_counts = get_genomewide_count(data_files.read_count_h5, chrom_list) unknown_chrom = set([]) if util.is_gzipped(args.input_file): f = gzip.open(args.input_file, "rt") else: f = open(args.input_file, "r") line_count = 0 if args.target_region_size: sys.stderr.write("setting target region size to %d\n" % args.target_region_size) for line in f: line_count += 1 if line_count % 1000 == 0: sys.stderr.write(".") if line.startswith("#"): continue words = line.rstrip().split() if words[1] == "NA": # no SNP defined on this line: write_NA_line(sys.stdout) continue chrom_name = words[0] if chrom_name in chrom_dict: chrom = chrom_dict[chrom_name] else: if not chrom_name.startswith("chr"): # try adding 'chr' to front of name new_chrom_name = "chr" + chrom_name if new_chrom_name in chrom_dict: chrom_name = new_chrom_name chrom = chrom_dict[chrom_name] else: # can't figure out this chromosome name if not chrom_name in unknown_chrom: unknown_chrom.add(chrom_name) sys.stderr.write("WARNING: unknown chromosome '%s'") continue region_list = get_target_regions(args, chrom, words) snp_pos = int(words[1]) snp_ref_base = words[3] snp_alt_base = words[4] # TODO: check that SNP ref/alt match? snp_region = coord.Coord(chrom, snp_pos, snp_pos) # pull out all of the SNPs in the target region(s) region_snps = get_region_snps(data_files, region_list, ind_idx) # pull out test SNP test_snp_list = get_region_snps(data_files, [snp_region], ind_idx) if len(test_snp_list) != 1: test_snp = None sys.stderr.write("WARNING: could not find test SNP at " "position %s:%d\n" % (chrom.name, snp_pos)) het_snps = [] else: test_snp = test_snp_list[0] # pull out haplotype counts from linked heterozygous SNPs het_snps = get_het_snps(region_snps) set_snp_counts(data_files, region_list, het_snps, test_snp, args) region_read_counts = get_region_read_counts(data_files, region_list) write_output(sys.stdout, region_list, het_snps, test_snp, snp_pos, region_read_counts, genomewide_read_counts) sys.stderr.write("\n") f.close() data_files.close()
def main(): args = parse_args() sys.stderr.write("command line: %s\n" % " ".join(sys.argv)) sys.stderr.write("python version: %s\n" % sys.version) sys.stderr.write("pysam version: %s\n" % pysam.__version__) sys.stderr.write("pytables version: %s\n" % tables.__version__) util.check_pysam_version() util.check_pytables_version() snp_tab_h5 = tables.open_file(args.snp_tab, "r") snp_index_h5 = tables.open_file(args.snp_index, "r") if args.haplotype: hap_h5 = tables.open_file(args.haplotype, "r") else: hap_h5 = None ref_count_h5 = tables.open_file(args.ref_as_counts, "w") alt_count_h5 = tables.open_file(args.alt_as_counts, "w") other_count_h5 = tables.open_file(args.other_as_counts, "w") read_count_h5 = tables.open_file(args.read_counts, "w") output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5] chrom_dict = {} # initialize every chromosome in output files chrom_list = chromosome.get_all_chromosomes(args.chrom) for chrom in chrom_list: for out_file in output_h5: create_carray(out_file, chrom, args.data_type) chrom_dict[chrom.name] = chrom count = 0 dtype = None if args.data_type == "uint8": max_count = MAX_UINT8_COUNT dtype = np.uint8 elif args.data_type == "uint16": max_count = MAX_UINT16_COUNT dtype = np.uint16 else: raise NotImplementedError("unsupported datatype %s" % args.data_type) # create a txt file to also holds the counts if args.txt_counts is not None: if os.path.splitext(args.txt_counts)[1] == ".gz": txt_counts = gzip.open(args.txt_counts, 'a+') else: txt_counts = open(args.txt_counts, 'a+') for chrom in chrom_list: sys.stderr.write("%s\n" % chrom.name) warned_pos = {} # fetch SNP info for this chromosome if chrom.name not in snp_tab_h5.root: # no SNPs for this chromosome continue sys.stderr.write("fetching SNPs\n") snp_tab = snp_tab_h5.get_node("/%s" % chrom.name) snp_index_array = snp_index_h5.get_node("/%s" % chrom.name)[:] if hap_h5: hap_tab = hap_h5.get_node("/%s" % chrom.name) ind_idx = snptable.SNPTable().get_h5_sample_indices( hap_h5, chrom, [args.individual])[1] if len(ind_idx) != 0: ind_idx = ind_idx[0] else: hap_tab = None ind_idx = None else: hap_tab = None ind_idx = None # initialize count arrays for this chromosome to 0 ref_carray = get_carray(ref_count_h5, chrom) alt_carray = get_carray(alt_count_h5, chrom) other_carray = get_carray(other_count_h5, chrom) read_count_carray = get_carray(read_count_h5, chrom) ref_array = np.zeros(chrom.length, dtype) alt_array = np.zeros(chrom.length, dtype) other_array = np.zeros(chrom.length, dtype) read_count_array = np.zeros(chrom.length, dtype) # loop over all BAM files, pulling out reads # for this chromosome for bam_filename in args.bam_filenames: sys.stderr.write("reading from file %s\n" % bam_filename) samfile = pysam.Samfile(bam_filename, "rb") for read in get_sam_iter(samfile, chrom): count += 1 if count == 10000: sys.stderr.write(".") count = 0 add_read_count(read, chrom, ref_array, alt_array, other_array, read_count_array, snp_index_array, snp_tab, hap_tab, warned_pos, max_count, ind_idx) # store results for this chromosome ref_carray[:] = ref_array alt_carray[:] = alt_array other_carray[:] = other_array read_count_carray[:] = read_count_array sys.stderr.write("\n") # write data to numpy arrays, so that they can be written to a txt # file later # columns are: # chrom, pos, ref, alt, genotype, ref_count, alt_count, other_count if args.txt_counts is not None: chrom = np.tile(chrom.name, len(snp_tab)) pos = np.array([snp['pos'] for snp in snp_tab]) ref = np.array([snp['allele1'] for snp in snp_tab]) alt = np.array([snp['allele2'] for snp in snp_tab]) if hap_tab is not None: genotype = np.array( [str(hap[0]) + "|" + str(hap[1]) for hap in hap_tab]) else: genotype = np.empty((len(snp_tab), 0)) # write an np array to a txt file np.savetxt(txt_counts, np.column_stack( (chrom, pos, ref, alt, genotype, ref_array[pos - 1], alt_array[pos - 1], other_array[pos - 1])), fmt="%1s", delimiter=" ") samfile.close() if args.txt_counts: # close the open txt file handler txt_counts.close() # check if any of the reads contained an unimplemented CIGAR sys.stderr.write( "WARNING: Encountered " + str(unimplemented_CIGAR[0]) + " instances of any of the following CIGAR codes: " + str(unimplemented_CIGAR[1]) + ". The regions of reads with these CIGAR codes were skipped because these CIGAR codes are currently unimplemented.\n" ) # set track statistics and close HDF5 files sys.stderr.write("setting statistics for each chromosome\n") for h5f in output_h5: chromstat.set_stats(h5f, chrom_list) h5f.close() snp_tab_h5.close() snp_index_h5.close() if hap_h5: hap_h5.close() sys.stderr.write("done\n")