def __init__(self, clr, expected, cooler_opts=None, view_df=None): self.clr = clr self.expected = expected # Detecting the columns for the detection of regions columns = expected.columns assert len(columns) > 0 if ("region1" not in columns) or ("region2" not in columns): if ("chrom" in columns) or ("region" in columns): raise ValueError( "Provided expected appears to have old format, it has to comply with the format of expected v1.0" ) else: raise ValueError( "Please check the expected dataframe, it has to comply with the format of expected v1.0" ) # get chromosomes from cooler, if view_df not specified: if view_df is None: view_df = bioframe.make_viewframe([ (chrom, 0, l, chrom) for chrom, l in clr.chromsizes.items() ]) else: # appropriate viewframe checks: if not bioframe.is_viewframe(view_df): raise ValueError("view_df is not a valid viewframe.") if not bioframe.is_contained( view_df, bioframe.make_viewframe(clr.chromsizes)): raise ValueError( "view_df is out of the bounds of chromosomes in cooler.") self.view_df = view_df.set_index("name") for (name1, name2), group in self.expected.groupby(["region1", "region2"]): if name1 != name2: raise ValueError( "Only symmetric regions a supported, e.g. chromosomes, arms, etc" ) n_diags = group.shape[0] region = self.view_df.loc[name1] lo, hi = self.clr.extent(region) if n_diags != (hi - lo): raise ValueError( "Region shape mismatch between expected and cooler. " "Are they using the same resolution?") self.binsize = self.clr.binsize self.offsets = {} self.pad = True self.cooler_opts = {} if cooler_opts is None else cooler_opts self.cooler_opts.setdefault("sparse", True)
def _make_cooler_view(view_df, clr): try: if not bioframe.is_viewframe(view_df, raise_errors=True): raise ValueError("view_df is not a valid viewframe.") except Exception as e: # AssertionError or ValueError, see https://github.com/gfudenberg/bioframe/blob/main/bioframe/core/checks.py#L177 warnings.warn( "view_df has to be a proper viewframe from next release", DeprecationWarning, stacklevel=2, ) view_df = bioframe.make_viewframe(view_df) if not bioframe.is_contained(view_df, bioframe.make_viewframe(clr.chromsizes)): raise ValueError( "View table is out of the bounds of chromosomes in cooler.") return view_df
def read_viewframe( fname, verify_cooler_view=None, ): """ Read a BED file with regions that conforms a definition of a viewframe (non-overlaping, unique names, etc). Parameters ---------- fname : str Path to a BED file with regions. verify_cooler_view : None or viewframe Viewframe with entire chromosome sizes Returns ------- view_df : pd.DataFrame DataFrame with the viewframe """ # define chromsizes based on verify_cooler_view chromsizes = None if (verify_cooler_view is None) else \ verify_cooler_view.set_index("chrom")["end"] # read BED file assuming bed4/3 formats (with names-columns and without): try: view_df = bioframe.read_table(fname, schema="bed4", index_col=False) except Exception: view_df = bioframe.read_table(fname, schema="bed3", index_col=False) # Convert view dataframe to viewframe: try: view_df = bioframe.make_viewframe(view_df) if (verify_cooler_view is None) else \ bioframe.make_viewframe(view_df, check_bounds=chromsizes) except ValueError as e: raise ValueError( "View table is incorrect, please, comply with the format. ") from e # Check that input view is contained in cooler bounds, but not vice versa (because cooler may have more regions): if verify_cooler_view is not None: if not bioframe.is_contained(view_df, verify_cooler_view): raise ValueError( "View regions are not contained in cooler chromsizes bounds") return view_df
def __init__(self, clr, cooler_opts=None, view_df=None): # get chromosomes from bins, if view_df not specified: if view_df is None: view_df = bioframe.make_viewframe([ (chrom, 0, l, chrom) for chrom, l in clr.chromsizes.items() ]) else: # appropriate viewframe checks: if not bioframe.is_viewframe(view_df): raise ValueError("view_df is not a valid viewframe.") if not bioframe.is_contained( view_df, bioframe.make_viewframe(clr.chromsizes)): raise ValueError( "view_df is out of the bounds of chromosomes in cooler.") self.view_df = view_df.set_index("name") self.clr = clr self.binsize = self.clr.binsize self.offsets = {} self.pad = True self.cooler_opts = {} if cooler_opts is None else cooler_opts self.cooler_opts.setdefault("sparse", True)
def is_compatible_viewframe(view_df, verify_cooler, check_sorting=False, raise_errors=False): """ Check if view_df is a viewframe and if it is compatible with the provided cooler. Parameters ---------- view_df : DataFrame view_df DataFrame to be validated verify_cooler : cooler cooler object to use for verification check_sorting : bool Check is regions in view_df are sorted as in chromosomes in cooler. raise_errors : bool raise expection instead of returning False Returns ------- is_compatible_viewframe : bool True when view_df is compatible, False otherwise """ try: try: _ = bioframe.is_viewframe(view_df, raise_errors=True) except Exception as error_not_viewframe: try: _ = bioframe.make_viewframe(view_df) except Exception as error_cannot_make_viewframe: # view_df is not viewframe and cannot be easily converted raise ValueError( "view_df is not a valid viewframe and cannot be recovered" ) from error_cannot_make_viewframe else: # view_df is not viewframe, but can be converted - formatting issue ? name-column ? raise ValueError( "view_df is not a valid viewframe, apply bioframe.make_viewframe to convert" ) from error_not_viewframe # is view_df contained inside cooler-chromosomes ? cooler_view = make_cooler_view(verify_cooler) if not bioframe.is_contained(view_df, cooler_view, raise_errors=False): raise ValueError( "View table is out of the bounds of chromosomes in cooler.") # is view_df sorted by coord and chrom order as in cooler ? if check_sorting: if not bioframe.is_sorted( view_df, cooler_view, df_view_col="chrom"): raise ValueError( "regions in the view_df must be sorted by coordinate" " and chromosomes order as as in the verify_cooler.") except Exception as e: if raise_errors: raise ValueError( "view_df is not compatible, or not a viewframe") from e else: # something went wrong: it's not a viewframe return False else: # no exceptions were raised: it's a compatible viewframe return True
feature_type = "bed" elif {"chrom1", "start1", "end1", "chrom2", "start2", "end1"}.issubset(features_df.columns): feature_type = "bedpe" else: raise ValueError("Unknown feature_df format") if flank is not None: features_df = expand_align_features(features_df, flank, clr.binsize, format=feature_type) if view_df is None: view_df = bioframe.make_viewframe(clr.chromsizes) else: if not bioframe.is_contained(view_df, bioframe.make_viewframe(clr.chromsizes)): raise ValueError( "view_df is out of the bounds of chromosomes in cooler.") features_df = assign_regions(features_df, view_df) # TODO Expected checks are now implemented in the snippers, maybe move them out to here # when there is a neat function? if expected_df is None: snipper = CoolerSnipper(clr, view_df=view_df) else: snipper = ObsExpSnipper(clr, expected_df, view_df=view_df) if nproc > 1: pool = multiprocessing.Pool(nproc)
def cooler_cis_eig( clr, bins, view_df=None, n_eigs=3, phasing_track_col="GC", balance="weight", ignore_diags=None, bad_bins=None, clip_percentile=99.9, sort_metric=None, map=map, ): """ Compute compartment eigenvector for a given cooler `clr` in a number of symmetric intra chromosomal regions defined in view_df (cis-regions), or for each chromosome. Note that the amplitude of compartment eigenvectors is weighted by their corresponding eigenvalue Parameters ---------- clr : cooler cooler object to fetch data from bins : DataFrame table of bins derived from clr with phasing track added view_df : iterable or DataFrame, optional if provided, eigenvectors are calculated for the regions of the view only, otherwise chromosome-wide eigenvectors are computed, for chromosomes specified in bins. n_eigs : int number of eigenvectors to compute phasing_track_col : str, optional name of the columns in `bins` table, if provided, eigenvectors are flipped to achieve a positive correlation with `bins[phasing_track_col]`. balance : str name of the column with balancing weights to be used. ignore_diags : int, optional the number of diagonals to ignore. Derived from cooler metadata if not specified. bad_bins : array-like a list of bins to ignore. Indexes of bins must be absolute, as in clr.bins()[:], as opposed to being offset by chromosome start. `bad_bins` will be combined with the bad bins masked by balancing. clip_percentile : float if >0 and <100, clip pixels with diagonal-normalized values higher than the specified percentile of matrix-wide values. sort_metric : str If provided, re-sort `eigenvecs` and `eigvals` in the order of decreasing correlation between phasing_track and eigenvector, using the specified measure of correlation. Possible values: 'pearsonr' - sort by decreasing Pearson correlation. 'var_explained' - sort by decreasing absolute amount of variation in `eigvecs` explained by `phasing_track` (i.e. R^2 * var(eigvec)) 'MAD_explained' - sort by decreasing absolute amount of Median Absolute Deviation from the median of `eigvecs` explained by `phasing_track` (i.e. COMED(eigvec, phasing_track) * MAD(eigvec)). 'spearmanr' - sort by decreasing Spearman correlation. This option is designed to report the most "biologically" informative eigenvectors first, and prevent eigenvector swapping caused by translocations. In reality, however, sometimes it shows poor performance and may lead to reporting of non-informative eigenvectors. Off by default. map : callable, optional Map functor implementation. Returns ------- eigvals, eigvec_table -> DataFrames with eigenvalues for each region and a table of eigenvectors filled in the `bins` table. .. note:: ALWAYS check your EVs by eye. The first one occasionally does not reflect the compartment structure, but instead describes chromosomal arms or translocation blowouts. Possible mitigations: employ `view_df` (e.g. arms) to avoid issues with chromosomal arms, use `bad_bins` to ignore small transolcations. """ # get chromosomes from cooler, if view_df not specified: if view_df is None: view_df = bioframe.make_viewframe([(chrom, 0, clr.chromsizes[chrom]) for chrom in clr.chromnames]) else: # appropriate viewframe checks: if not bioframe.is_viewframe(view_df): raise ValueError("view_df is not a valid viewframe.") if not bioframe.is_contained(view_df, bioframe.make_viewframe(clr.chromsizes)): raise ValueError( "view_df is out of the bounds of chromosomes in cooler.") # make sure phasing_track_col is in bins, if phasing is requested if phasing_track_col and (phasing_track_col not in bins): raise ValueError(f'No column "{phasing_track_col}" in the bin table') # ignore diags as in cooler inless specified ignore_diags = (clr._load_attrs("bins/weight").get("ignore_diags", 2) if ignore_diags is None else ignore_diags) # prepare output table for eigen vectors eigvec_table = bins.copy() eigvec_columns = [f"E{i + 1}" for i in range(n_eigs)] for ev_col in eigvec_columns: eigvec_table[ev_col] = np.nan # prepare output table for eigenvalues eigvals_table = view_df.copy() eigval_columns = [f"eigval{i + 1}" for i in range(n_eigs)] for eval_col in eigval_columns: eigvals_table[eval_col] = np.nan def _each(region): """ perform eigen decomposition for a given region assuming safety checks are done outside of this function. Parameters ---------- region: tuple-like tuple of the form (chroms,start,end,*) Returns ------- _region, eigvals, eigvecs -> ndarrays array of eigenvalues and an array eigenvectors """ _region = region[:3] # take only (chrom, start, end) A = clr.matrix(balance=balance).fetch(_region) # filter bad_bins relevant for the _region from A if bad_bins is not None: # filter bad_bins for the _region and turn relative: lo, hi = clr.extent(_region) bad_bins_region = bad_bins[(bad_bins >= lo) & (bad_bins < hi)] bad_bins_region -= lo if len(bad_bins_region) > 0: # apply bad bins to symmetric matrix A: A[:, bad_bins_region] = np.nan A[bad_bins_region, :] = np.nan # extract phasing track relevant for the _region phasing_track = (bioframe.select(bins, _region)[phasing_track_col].values if phasing_track_col else None) eigvals, eigvecs = cis_eig( A, n_eigs=n_eigs, ignore_diags=ignore_diags, phasing_track=phasing_track, clip_percentile=clip_percentile, sort_metric=sort_metric, ) return _region, eigvals, eigvecs # eigendecompose matrix per region (can be multiprocessed) # output assumes that the order of results matches regions results = map(_each, view_df.values) # go through eigendecomposition results and fill in # output table eigvec_table and eigvals_table for _region, _eigvals, _eigvecs in results: idx = bioframe.select(eigvec_table, _region).index eigvec_table.at[idx, eigvec_columns] = _eigvecs.T idx = bioframe.select(eigvals_table, _region).index eigvals_table.at[idx, eigval_columns] = _eigvals return eigvals_table, eigvec_table
def compute_pileup( cool_path, features, view, expected, flank, features_format, weight_name, out, out_format, store_snips, nproc, ignore_diags, aggregate, force, verbose, ): """ Perform retrieval of the snippets from .cool file. COOL_PATH : The paths to a .cool file with a balanced Hi-C map. Use the '::' syntax to specify a group path in a multicooler file. FEATURES_PATH : the path to a BED or BEDPE-like file that contains features for snipping windows. If BED, then the features are on-diagonal. If BEDPE, then the features can be off-diagonal (but not in trans or between different regions in the view). """ clr = cooler.Cooler(cool_path) #### Read the features: buf, names = sniff_for_header(features) if features_format.lower() == "bedpe": default_cols = [0, 1, 2, 3, 4, 5] bedpe_cols = ["chrom1", "start1", "end1", "chrom2", "start2", "end2"] dtypes = { "chrom1": str, "start1": np.int64, "end1": np.int64, "chrom2": str, "start2": np.int64, "end2": np.int64, } if names is None: kwargs = dict( header=None, usecols=default_cols, dtype=dtypes, names=bedpe_cols, ) else: kwargs = dict(header="infer", usecols=bedpe_cols) elif features_format.lower() == "bed": default_cols = [0, 1, 2] bed_cols = ["chrom", "start", "end"] dtypes = {"chrom": str, "start": np.int64, "end": np.int64} if names is None: kwargs = dict( header=None, names=bed_cols, ) else: kwargs = dict(header="infer", usecols=bed_cols) else: raise ValueError( "Automatic detection of features format is not implemented yet. " "Please provide BED or BEDPE as --features-format") features_df = pd.read_table(buf, comment="#", usecols=default_cols, dtype=dtypes, verbose=verbose, **kwargs) ###### Define view for cis compartment-calling # use input "view" BED file or all chromosomes mentioned in "track": if view is None: # Generate viewframe from clr.chromsizes: view_df = bioframe.make_viewframe([(chrom, 0, clr.chromsizes[chrom]) for chrom in clr.chromnames]) if not bioframe.is_contained(features_df, view_df): raise ValueError( "Features are not contained in chromosomes bounds") else: # Make viewframe out of table: # Read view_df: try: view_df = bioframe.read_table(view, schema="bed4", index_col=False) except Exception: view_df = bioframe.read_table(view, schema="bed3", index_col=False) # Convert view_df to viewframe: try: view_df = bioframe.make_viewframe(view_df, check_bounds=clr.chromsizes) except ValueError as e: raise ValueError( "View table is incorrect, please, comply with the format. " ) from e if not bioframe.is_contained(features_df, view_df): raise ValueError("Features are not contained in view bounds") ##### Read expected, should be cis-expected: if not expected is None: expected_path, expected_value_col = expected expected_summary_cols = [ expected_value_col, ] expected = read_expected( expected_path, contact_type="cis", expected_value_cols=expected_summary_cols, verify_view=view_df, verify_cooler=clr, ) ##### CReate the pileup: stack = snipping.pileup( clr, features_df, view_df=view_df, expected_df=expected, flank=flank, min_diag=ignore_diags, # TODO: implement in pileup API clr_weight_name=weight_name, # TODO: implement in pileup API force=force, # TODO: implement in pileup API nproc=nproc, ) ##### Aggregate the signal: aggregate = aggregate.lower() if aggregate is None or aggregate == "mean" or aggregate == "none": agg_func = np.nanmean elif aggregate == "median": agg_func = np.nanmedian elif aggregate == "min": agg_func = np.nanmin elif aggregate == "max": agg_func = np.nanmax elif aggregate == "std": agg_func = np.nanstd else: raise ValueError( f"Aggregation mode {aggregate} not supported. Please use mean/median/min/max/std." ) pileup = agg_func(stack, axis=2) ##### Store the data as NPZ file: if out_format.lower() == "npz": if store_snips: np.savez(out, pileup=pileup) else: np.savez(out, pileup=pileup, stack=stack) elif out_format.lower() == "hdf5": h5 = h5py.File(out, "w") h5.create_dataset("pileup", data=pileup) if store_snips: h5.create_dataset("stack", data=stack)
def compute_saddle( cool_path, track_path, expected_path, contact_type, min_dist, max_dist, n_bins, vrange, qrange, clr_weight_name, strength, view, out_prefix, fig, scale, cmap, vmin, vmax, hist_color, verbose, ): """ Calculate saddle statistics and generate saddle plots for an arbitrary signal track on the genomic bins of a contact matrix. COOL_PATH : The paths to a .cool file with a balanced Hi-C map. Use the '::' syntax to specify a group path in a multicooler file. TRACK_PATH : The path to bedGraph-like file with a binned compartment track (eigenvector), including a header. Use the '::' syntax to specify a column name. EXPECTED_PATH : The paths to a tsv-like file with expected signal, including a header. Use the '::' syntax to specify a column name. Analysis will be performed for chromosomes referred to in TRACK_PATH, and therefore these chromosomes must be a subset of chromosomes referred to in COOL_PATH and EXPECTED_PATH. COOL_PATH, TRACK_PATH and EXPECTED_PATH must be binned at the same resolution (expect for EXPECTED_PATH in case of trans contact type). EXPECTED_PATH must contain at least the following columns for cis contacts: 'chrom', 'diag', 'n_valid', value_name and the following columns for trans contacts: 'chrom1', 'chrom2', 'n_valid', value_name value_name is controlled using options. Header must be present in a file. """ #### Read inputs: #### clr = cooler.Cooler(cool_path) expected_path, expected_value_col = expected_path track_path, track_name = track_path #### Read track: #### # read bedGraph-file : track_columns = ["chrom", "start", "end", track_name] # specify dtype as a rudimentary form of validation: track_dtype = { "chrom": np.str, "start": np.int64, "end": np.int64, track_name: np.float64, } track = pd.read_table( track_path, usecols=track_columns, dtype=track_dtype, comment=None, verbose=verbose, ) #### Generate viewframes #### # 1:cooler_view_df. Generate viewframe from clr.chromsizes: cooler_view_df = bioframe.make_viewframe(clr.chromsizes) # 2:view_df. Define global view for calculating calling dots # use input "view" BED file or all chromosomes : view_df = cooler_view_df if (view is None) else read_viewframe( view, cooler_view_df) # 3:track_view_df. Generate viewframe from track table: track_view_df = bioframe.make_viewframe([ (group.chrom.iloc[0], np.nanmin(group.start), np.nanmax(group.end)) for i, group in track.reset_index().groupby("chrom") ]) #### Read expected: #### expected_summary_cols = [ expected_value_col, ] expected = read_expected( expected_path, contact_type=contact_type, expected_value_cols=expected_summary_cols, verify_view=view_df, verify_cooler=clr, ) # add checks to make sure cis-expected is symmetric ############################################# # CROSS-VALIDATE viewframes of COOLER, TRACK and EXPECTED: # Scheme: view <= {track_view, expected} <= cooler_view ############################################# # Track is contained in cooler bounds, but not vice versa (because cooler may have more regions): if not bioframe.is_contained(track_view_df, cooler_view_df): raise ValueError( "Track regions are not contained in cooler chromsizes bounds") # View is contained in track bounds, but not vice versa (because track may have more regions): if not bioframe.is_contained(view_df, track_view_df): raise ValueError( "View table does not have some regions annotated in the track") ############################################# # CROSS-VALIDATION IS COMPLETE. ############################################# if min_dist < 0: min_diag = 3 else: min_diag = int(np.ceil(min_dist / clr.binsize)) if max_dist >= 0: max_diag = int(np.floor(max_dist / clr.binsize)) else: max_diag = -1 track = saddle.mask_bad_bins((track, track_name), (clr.bins()[:], clr_weight_name)) if vrange[0] is None: vrange = None if qrange[0] is None: qrange = None digitized, binedges = saddle.get_digitized( track[["chrom", "start", "end", track_name]], n_bins, vrange=vrange, qrange=qrange, digitized_suffix=".d", ) S, C = saddle.get_saddle( clr, expected, digitized[["chrom", "start", "end", track_name + ".d"]], contact_type, view_df=view_df, clr_weight_name=clr_weight_name, expected_value_col=expected_value_col, view_name_col="name", min_diag=min_diag, max_diag=max_diag, verbose=verbose, ) saddledata = S / C to_save = dict(saddledata=saddledata, binedges=binedges, digitized=digitized, saddlecounts=C) if strength: ratios = saddle.saddle_strength(S, C) ratios = ratios[1:-1] # drop outlier bins to_save["saddle_strength"] = ratios # Save data np.savez(out_prefix + ".saddledump", **to_save) # .npz auto-added digitized.to_csv(out_prefix + ".digitized.tsv", sep="\t", index=False) # Generate figure if len(fig): try: import matplotlib as mpl mpl.use("Agg") # savefig only for now: import matplotlib.pyplot as plt except ImportError: print("Install matplotlib to use ", file=sys.stderr) sys.exit(1) if hist_color is None: color = ( 0.41568627450980394, 0.8, 0.39215686274509803, ) # sns.color_palette('muted')[2] else: color = mpl.colors.colorConverter.to_rgb(hist_color) title = op.basename(cool_path) + " ({})".format(contact_type) if qrange is not None: track_label = track_name + " quantiles" else: track_label = track_name clabel = "(contact frequency / expected)" saddle.saddleplot( track, saddledata, n_bins, vrange=vrange, qrange=qrange, scale=scale, vmin=vmin, vmax=vmax, color=color, title=title, xlabel=track_label, ylabel=track_label, clabel=clabel, cmap=cmap, ) for ext in fig: plt.savefig(out_prefix + "." + ext, bbox_inches="tight")
def pileup( cool_path, features, view, expected, flank, features_format, clr_weight_name, out, out_format, store_snips, nproc, ignore_diags, aggregate, verbose, ): """ Perform retrieval of the snippets from .cool file. COOL_PATH : The paths to a .cool file with a balanced Hi-C map. Use the '::' syntax to specify a group path in a multicooler file. FEATURES_PATH : the path to a BED or BEDPE-like file that contains features for snipping windows. If BED, then the features are on-diagonal. If BEDPE, then the features can be off-diagonal (but not in trans or between different regions in the view). """ clr = cooler.Cooler(cool_path) cooler_view_df = make_cooler_view(clr) #### Read the features: buf, names = sniff_for_header(features) if features_format.lower() == "bedpe": default_cols = [0, 1, 2, 3, 4, 5] bedpe_cols = ["chrom1", "start1", "end1", "chrom2", "start2", "end2"] dtypes = { "chrom1": str, "start1": np.int64, "end1": np.int64, "chrom2": str, "start2": np.int64, "end2": np.int64, } if names is None: kwargs = dict( header=None, usecols=default_cols, dtype=dtypes, names=bedpe_cols, ) else: kwargs = dict(header="infer", usecols=bedpe_cols) elif features_format.lower() == "bed": default_cols = [0, 1, 2] bed_cols = ["chrom", "start", "end"] dtypes = {"chrom": str, "start": np.int64, "end": np.int64} if names is None: kwargs = dict( header=None, names=bed_cols, ) else: kwargs = dict(header="infer", usecols=bed_cols) else: raise NotImplementedError( "Automatic detection of features format is not implemented yet. " "Please provide BED or BEDPE as --features-format") features_df = pd.read_table(buf, comment="#", usecols=default_cols, dtype=dtypes, verbose=verbose, **kwargs) ###### Define view if view is None: # full chromosome case view_df = cooler_view_df else: # Read view_df dataframe, and verify against cooler view_df = read_viewframe_from_file(view, clr, check_sorting=True) # make sure feature are compatible with the view_df if not bioframe.is_contained(features_df, view_df): raise ValueError("Features are not contained in view bounds") ##### Read expected, should be cis-expected: if expected is None: expected_value_col = None else: expected_path, expected_value_col = expected expected_value_cols = [ expected_value_col, ] expected = read_expected_from_file( expected_path, contact_type="cis", expected_value_cols=expected_value_cols, verify_view=view_df, verify_cooler=clr, ) ##### Create the pileup: stack = api.snipping.pileup( clr, features_df, view_df=view_df, expected_df=expected, expected_value_col=expected_value_col, flank=flank, min_diag=ignore_diags, clr_weight_name=clr_weight_name, nproc=nproc, ) ##### Aggregate the signal: aggregate = aggregate.lower() if aggregate is None or aggregate == "mean" or aggregate == "none": agg_func = np.nanmean elif aggregate == "median": agg_func = np.nanmedian elif aggregate == "min": agg_func = np.nanmin elif aggregate == "max": agg_func = np.nanmax elif aggregate == "std": agg_func = np.nanstd else: raise ValueError( f"Aggregation mode {aggregate} not supported. Please use mean/median/min/max/std." ) pileup = agg_func(stack, axis=2) ##### Store the data as NPZ file: if out_format.lower() == "npz": if store_snips: np.savez(out, pileup=pileup) else: np.savez(out, pileup=pileup, stack=stack) elif out_format.lower() == "hdf5": h5 = h5py.File(out, "w") h5.create_dataset("pileup", data=pileup) if store_snips: h5.create_dataset("stack", data=stack)