def h(paths, sample_names, tmp_path, json, header, out_path, i, first): """inner part of stage one, including transformation from a gvcf into the combiner's format""" vcfs = [ comb.transform_one(vcf) for vcf in hl.import_gvcfs(paths, json, array_elements_required=False, _external_header=header, _external_sample_ids=sample_names) ] combined = [ comb.combine_gvcfs(mts) for mts in chunks(vcfs, MAX_COMBINE_NUMBER) ] if first and len( paths ) <= MAX_COMBINE_NUMBER: # only 1 item, just write it, unless we have already written other items combined[0].write(out_path, overwrite=True) return [] pad = len(str(len(combined))) hl.experimental.write_matrix_tables(combined, tmp_path + f'{i}/', overwrite=True) return [ tmp_path + f'{i}/' + str(n).zfill(pad) + '.mt' for n in range(len(combined)) ]
def h(paths, sample_names, tmp_path, json, header, out_path, i): vcfs = [comb.transform_one(vcf) for vcf in hl.import_vcfs(paths, json, array_elements_required=False, _external_header=header, _external_sample_ids=sample_names)] combined = [comb.combine_gvcfs(mts) for mts in chunks(vcfs, MAX_COMBINE_NUMBER)] if len(paths) <= MAX_COMBINE_NUMBER: # only 1 item, just write it combined[0].write(out_path, overwrite=True) return [] pad = len(str(len(combined))) hl.experimental.write_matrix_tables(combined, tmp_path + f'{i}/', overwrite=True) return [tmp_path + f'{i}/' + str(n).zfill(pad) + '.mt' for n in range(len(combined))]
def run_combiner(sample_list, intervals, out_path, tmp_path, summary_path=None, overwrite=False): import gc # make the temp path a directory, no matter what tmp_path += f'/combiner-temporary/{uuid.uuid4()}/' vcfs = [ comb.transform_one(vcf) for vcf in hl.import_vcfs( sample_list, intervals, array_elements_required=False) ] combined = [ comb.combine_gvcfs(mts) for mts in chunks(vcfs, MAX_COMBINER_LENGTH) ] if len(combined) == 1: combined[0].write(out_path, overwrite=overwrite) else: hl.utils.java.info(f'Writing combiner temporary files to: {tmp_path}') i = 0 while len(combined) > 1: pad = len(str(len(combined))) hl.experimental.write_matrix_tables(combined, tmp_path + f'{i}/', overwrite=True) paths = [ tmp_path + f'{i}/' + str(n).zfill(pad) + '.mt' for n in range(len(combined)) ] i += 1 wmts = [hl.read_matrix_table(path) for path in paths] combined = [ comb.combine_gvcfs(mts) for mts in chunks(wmts, MAX_COMBINER_LENGTH) ] gc.collect() # need to try to free memory on the master combined[0].write(out_path, overwrite=overwrite) if summary_path is not None: mt = hl.read_matrix_table(out_path) comb.summarize(mt).rows().write(summary_path, overwrite=overwrite)
def run_combiner(sample_list, json, out_path, tmp_path, summary_path=None, overwrite=False): import gc # make the temp path a directory, no matter what tmp_path += f'/combiner-temporary/{uuid.uuid4()}/' vcfs = [comb.transform_one(vcf) for vcf in hl.import_vcfs(sample_list, json, array_elements_required=False)] combined = [comb.combine_gvcfs(mts) for mts in chunks(vcfs, MAX_COMBINER_LENGTH)] if len(combined) == 1: combined[0].write(out_path, overwrite=overwrite) else: hl.utils.java.info(f'Writing combiner temporary files to: {tmp_path}') i = 0 while len(combined) > 1: pad = len(str(len(combined))) hl.experimental.write_matrix_tables(combined, tmp_path + f'{i}/', overwrite=True) paths = [tmp_path + f'{i}/' + str(n).zfill(pad) + '.mt' for n in range(len(combined))] i += 1 wmts = [hl.read_matrix_table(path) for path in paths] combined = [comb.combine_gvcfs(mts) for mts in chunks(wmts, MAX_COMBINER_LENGTH)] gc.collect() # need to try to free memory on the master combined[0].write(out_path, overwrite=overwrite) if summary_path is not None: mt = hl.read_matrix_table(out_path) comb.summarize(mt).rows().write(summary_path, overwrite=overwrite)