Пример #1
0
 def test_write_patterns(self):
     """Test if pattern coordinates are saved to disk as expected."""
     # Generate dummy pattern list
     res, n_patterns = 5000, 100000
     chrom_names = ["c1", "c2", "c3"]
     bins_per_chrom = [
         n_patterns // 3,
         n_patterns // 3,
         n_patterns // 3 + n_patterns % 3,
     ]
     tmp_coords = pd.DataFrame(
         {
             "chr1": np.repeat(chrom_names, bins_per_chrom),
             "start1": range(0, res * (n_patterns), res),
             "end1": range(res, res * (n_patterns + 1), res),
             "chr2": np.repeat(chrom_names, bins_per_chrom),
             "start2": range(0, res * (n_patterns), res),
             "end2": range(res, res * (n_patterns + 1), res),
             "bin1": range(n_patterns),
             "bin2": range(1, n_patterns + 1),
             "kernel_id": 0,
             "iteration": 0,
             "score": np.random.randint(0, 100, n_patterns),
         }
     )
     for dec in range(1, 5):
         cio.write_patterns(
             tmp_coords, self.tmp_file, self.tmp_dir, dec=dec
         )
         obs_coords = pd.read_csv(self.tmp_path + ".txt", sep="\t")
         assert obs_coords.shape == tmp_coords.shape
         assert np.all(
             np.isclose(obs_coords.score, np.round(tmp_coords.score, dec))
         )
         os.unlink(self.tmp_path + ".txt")
Пример #2
0
def cmd_detect(arguments):
    # Parse command line arguments for detect
    kernel_config_path = arguments["--kernel-config"]
    dump = arguments["--dump"]
    interchrom = arguments["--inter"]
    iterations = arguments["--iterations"]
    mat_path = arguments["<contact_map>"]
    max_dist = arguments["--max-dist"]
    min_dist = arguments["--min-dist"]
    min_separation = arguments["--min-separation"]
    n_mads = float(arguments["--n-mads"])
    pattern = arguments["--pattern"]
    perc_undetected = arguments["--perc-undetected"]
    precision = arguments["--precision"]
    resize = arguments["--resize-kernel"]
    threads = arguments["--threads"]
    output = arguments["<output>"]
    win_fmt = arguments["--win-fmt"]
    subsample = arguments["--subsample"]
    if subsample == "no":
        subsample = None
    plotting_enabled = False if arguments["--no-plotting"] else True
    smooth_trend = arguments["--smooth-trend"]
    if smooth_trend is None:
        smooth_trend = False
    # If output is not specified, use current directory
    if not output:
        output = pathlib.Path()
    else:
        output = pathlib.Path(output)
    output.mkdir(exist_ok=True)

    if win_fmt not in ["npy", "json"]:
        sys.stderr.write("Error: --win-fmt must be either json or npy.\n")
        sys.exit(1)
    # Read a user-provided kernel config if custom is true
    # Else, load a preset kernel config for input pattern
    # Configs are JSON files containing all parameter associated with the pattern
    # They are loaded into a dictionary in the form :
    # {"max_iterations": 3, "kernels": [kernel1, kernel2, ...], ...}
    # Where each kernel is a 2D numpy array representing the pattern
    if kernel_config_path is not None:
        custom = True
        # Loading input path as config
        config_path = kernel_config_path
    else:
        custom = False
        # Will use a preset config file matching pattern name
        config_path = pattern

    ### 0: LOAD INPUT
    params = {
        "max_iterations": (iterations, int),
        "precision": (precision, float),
        "max_dist": (max_dist, int),
        "min_dist": (min_dist, int),
        "min_separation": (min_separation, int),
        "max_perc_undetected": (perc_undetected, float),
    }
    kernel_config = cio.load_kernel_config(config_path, custom)
    for param_name, (param_value, param_type) in params.items():
        kernel_config = _override_kernel_config(
            param_name, param_value, param_type, kernel_config
        )

    # NOTE: Temporary warning
    if interchrom:
        sys.stderr.write(
            "WARNING: Detection on interchromosomal matrices is expensive in RAM\n"
        )
    hic_genome = HicGenome(
        mat_path,
        inter=interchrom,
        kernel_config=kernel_config,
        dump=dump,
        smooth=smooth_trend,
    )
    ### 1: Process input signal
    #  Adapt size of kernel matrices based on the signal resolution
    if resize:
        for i, mat in enumerate(kernel_config["kernels"]):
            kernel_config["kernels"][i] = resize_kernel(
                mat,
                kernel_res=kernel_config["resolution"],
                signal_res=hic_genome.resolution,
            )
    hic_genome.kernel_config = kernel_config
    # Subsample Hi-C contacts from the matrix, if requested
    # NOTE: Subsampling has to be done before normalisation
    hic_genome.subsample(subsample)
    # Normalize (balance) matrix using ICE
    hic_genome.normalize(n_mads=n_mads)
    # Define how many diagonals should be used in intra-matrices
    hic_genome.compute_max_dist()
    # Split whole genome matrix into intra- and inter- sub matrices. Each sub
    # matrix is processed on the fly (obs / exp, trimming diagonals > max dist)
    hic_genome.make_sub_matrices()

    all_pattern_coords = []
    all_pattern_windows = []

    ### 2: DETECTION ON EACH SUBMATRIX
    pool = mp.Pool(int(threads))
    n_sub_mats = hic_genome.sub_mats.shape[0]
    # Loop over the different kernel matrices for input pattern
    run_id = 0
    total_runs = (
        len(kernel_config["kernels"]) * kernel_config["max_iterations"]
    )
    sys.stderr.write("Detecting patterns...\n")
    for kernel_id, kernel_matrix in enumerate(kernel_config["kernels"]):
        # Adjust kernel iteratively
        for i in range(kernel_config["max_iterations"]):
            cio.progress(
                run_id, total_runs, f"Kernel: {kernel_id}, Iteration: {i}\n"
            )

            # Apply detection procedure to all sub matrices in parallel
            sub_mat_data = zip(
                hic_genome.sub_mats.iterrows(),
                [kernel_config for i in range(n_sub_mats)],
                [kernel_matrix for i in range(n_sub_mats)],
                [dump for i in range(n_sub_mats)],
            )
            # Run detection in parallel on different sub matrices, and show progress when
            # gathering results
            sub_mat_results = []
            for i, result in enumerate(pool.imap_unordered(_detect_sub_mat, sub_mat_data, 1)):
                chr1 = hic_genome.sub_mats.chr1[i]
                chr2 = hic_genome.sub_mats.chr2[i]
                cio.progress(i, n_sub_mats, f"{chr1}-{chr2}")
                sub_mat_results.append(result)
            #sub_mat_results = map(_detect_sub_mat, sub_mat_data)
            # Convert coordinates from chromosome to whole genome bins
            kernel_coords = [
                hic_genome.get_full_mat_pattern(
                    d["chr1"], d["chr2"], d["coords"]
                )
                for d in sub_mat_results
                if d["coords"] is not None
            ]

            # Gather newly detected pattern coordinates
            try:
                # Extract surrounding windows for each sub_matrix
                kernel_windows = np.concatenate(
                    [
                        w["windows"]
                        for w in sub_mat_results
                        if w["windows"] is not None
                    ],
                    axis=0,
                )
                all_pattern_coords.append(
                    pd.concat(kernel_coords, axis=0).reset_index(drop=True)
                )
                # Add info about kernel and iteration which detected these patterns
                all_pattern_coords[-1]["kernel_id"] = kernel_id
                all_pattern_coords[-1]["iteration"] = i
                all_pattern_windows.append(kernel_windows)

            # If no pattern was found with this kernel
            # skip directly to the next one, skipping iterations
            except ValueError:
                break

            # Update kernel with patterns detected at current iteration
            kernel_matrix = cid.pileup_patterns(kernel_windows)
            run_id += 1
    cio.progress(run_id, total_runs, f"Kernel: {kernel_id}, Iteration: {i}\n")

    # If no pattern detected on any chromosome, with any kernel, exit gracefully
    if len(all_pattern_coords) == 0:
        sys.stderr.write("No pattern detected ! Exiting.\n")
        sys.exit(0)

    # Combine patterns of all kernel matrices into a single array
    all_pattern_coords = pd.concat(all_pattern_coords, axis=0).reset_index(
        drop=True
    )
    # Combine all windows from different kernels into a single pile of windows
    all_pattern_windows = np.concatenate(all_pattern_windows, axis=0)

    # Compute minimum separation in bins and make sure it has a reasonable value
    separation_bins = int(
        kernel_config["min_separation"] // hic_genome.resolution
    )
    if separation_bins < 1:
        separation_bins = 1
    print(f"Minimum pattern separation is : {separation_bins}")
    # Remove patterns with overlapping windows (smeared patterns)
    distinct_patterns = cid.remove_neighbours(
        all_pattern_coords, win_size=separation_bins
    )

    # Drop patterns that are too close to each other
    all_pattern_coords = all_pattern_coords.loc[distinct_patterns, :]
    all_pattern_windows = all_pattern_windows[distinct_patterns, :, :]

    # Get from bins into basepair coordinates
    coords_1 = hic_genome.bins_to_coords(all_pattern_coords.bin1).reset_index(
        drop=True
    )
    coords_1.columns = [str(col) + "1" for col in coords_1.columns]
    coords_2 = hic_genome.bins_to_coords(all_pattern_coords.bin2).reset_index(
        drop=True
    )
    coords_2.columns = [str(col) + "2" for col in coords_2.columns]

    all_pattern_coords = pd.concat(
        [all_pattern_coords.reset_index(drop=True), coords_1, coords_2], axis=1
    )

    # Filter patterns closer than minimum distance from the diagonal if any
    min_dist_drop_mask = (
        all_pattern_coords.chrom1 == all_pattern_coords.chrom2
    ) & (
        np.abs(all_pattern_coords.start2 - all_pattern_coords.start1)
        < int(kernel_config["min_dist"])
    )
    # Reorder columns at the same time
    all_pattern_coords = all_pattern_coords.loc[
        ~min_dist_drop_mask,
        [
            "chrom1",
            "start1",
            "end1",
            "chrom2",
            "start2",
            "end2",
            "bin1",
            "bin2",
            "kernel_id",
            "iteration",
            "score",
        ],
    ]
    all_pattern_windows = all_pattern_windows[~min_dist_drop_mask, :, :]

    ### 3: WRITE OUTPUT
    sys.stderr.write(f"{all_pattern_coords.shape[0]} patterns detected\n")
    # Save patterns and their coordinates in a tsv file
    cio.write_patterns(
        all_pattern_coords, kernel_config["name"] + "_out", output
    )
    # Save windows as an array in an npy file
    cio.save_windows(
        all_pattern_windows,
        kernel_config["name"] + "_out",
        output,
        format=win_fmt,
    )

    # Generate pileup visualisations if requested
    if plotting_enabled:
        # Compute and plot pileup
        pileup_fname = ("pileup_of_{n}_{pattern}").format(
            pattern=kernel_config["name"], n=all_pattern_windows.shape[0]
        )
        windows_pileup = cid.pileup_patterns(all_pattern_windows)
        pileup_plot(windows_pileup, name=pileup_fname, output=output)
Пример #3
0
def cmd_detect(args):
    # Parse command line arguments for detect
    dump = args["--dump"]
    norm = args["--norm"]
    interchrom = args["--inter"]
    iterations = args["--iterations"]
    kernel_config_path = args["--kernel-config"]
    mat_path = args["<contact_map>"]
    max_dist = args["--max-dist"]
    min_dist = args["--min-dist"]
    min_separation = args["--min-separation"]
    n_mads = float(args["--n-mads"])
    prefix = args["<prefix>"]
    pattern = args["--pattern"]
    pearson = args["--pearson"]
    perc_zero = args["--perc-zero"]
    perc_undetected = args["--perc-undetected"]
    subsample = args["--subsample"]
    threads = int(args["--threads"])
    tsvd = 0.999 if args["--tsvd"] else None
    win_fmt = args["--win-fmt"]
    win_size = args["--win-size"]
    if subsample == "no":
        subsample = None
    plotting_enabled = False if args["--no-plotting"] else True
    smooth_trend = args["--smooth-trend"]
    if smooth_trend is None:
        smooth_trend = False

    # If prefix involves a directory, crash if it does not exist
    cio.check_prefix_dir(prefix)

    if win_fmt not in ["npy", "json"]:
        sys.stderr.write("Error: --win-fmt must be either json or npy.\n")
        sys.exit(1)
    # Read a user-provided kernel config if custom is true
    # Else, load a preset kernel config for input pattern
    # Configs are JSON files containing all parameter associated with the pattern
    # They are loaded into a dictionary in the form :
    # {"max_iterations": 3, "kernels": [kernel1, kernel2, ...], ...}
    # Where each kernel is a 2D numpy array representing the pattern
    if kernel_config_path is not None:
        custom = True
        # Loading input path as config
        config_path = kernel_config_path
    else:
        custom = False
        # Will use a preset config file matching pattern name
        config_path = pattern

    ### 0: LOAD INPUT
    params = {
        "max_iterations": (iterations, int),
        "pearson": (pearson, float),
        "max_dist": (max_dist, int),
        "min_dist": (min_dist, int),
        "min_separation": (min_separation, int),
        "max_perc_undetected": (perc_undetected, float),
        "max_perc_zero": (perc_zero, float),
    }
    cfg = cio.load_kernel_config(config_path, custom)
    for param_name, (param_value, param_type) in params.items():
        cfg = _override_kernel_config(param_name, param_value, param_type, cfg)

    # Resize kernels if requested
    if win_size != "auto":
        win_size = int(win_size)
        if not win_size % 2:
            raise ValueError("--win-size must be odd")
        resize = lambda m: resize_kernel(m, factor=win_size / m.shape[0])
        cfg["kernels"] = [resize(k) for k in cfg["kernels"]]

    if interchrom:
        sys.stderr.write(
            "WARNING: Detection on interchromosomal matrices is expensive in RAM\n"
        )
    hic_genome = HicGenome(
        mat_path,
        inter=interchrom,
        kernel_config=cfg,
        dump=dump,
        smooth=smooth_trend,
        sample=subsample,
    )
    ### 1: Process input signal
    hic_genome.kernel_config = cfg
    # Normalize (balance) matrix using ICE
    hic_genome.normalize(norm=norm, n_mads=n_mads, threads=threads)
    # Define how many diagonals should be used in intra-matrices
    hic_genome.compute_max_dist()
    # Split whole genome matrix into intra- and inter- sub matrices. Each sub
    # matrix is processed on the fly (obs / exp, trimming diagonals > max dist)
    hic_genome.make_sub_matrices()

    all_coords = []
    all_windows = []

    ### 2: DETECTION ON EACH SUBMATRIX
    n_sub_mats = hic_genome.sub_mats.shape[0]
    # Loop over the different kernel matrices for input pattern
    run_id = 0
    # Use cfg to inform jobs whether they should run full convolution
    cfg["tsvd"] = tsvd
    total_runs = len(cfg["kernels"]) * cfg["max_iterations"]
    sys.stderr.write("Detecting patterns...\n")
    for kernel_id, kernel_matrix in enumerate(cfg["kernels"]):
        # Adjust kernel iteratively
        for i in range(cfg["max_iterations"]):
            cio.progress(
                run_id, total_runs, f"Kernel: {kernel_id}, Iteration: {i}\n"
            )

            # Apply detection procedure to all sub matrices in parallel
            sub_mat_data = zip(
                hic_genome.sub_mats.iterrows(),
                [cfg for i in range(n_sub_mats)],
                [kernel_matrix for i in range(n_sub_mats)],
                [dump for i in range(n_sub_mats)],
            )
            # Run detection in parallel on different sub matrices, and show progress when
            # gathering results
            sub_mat_results = []
            # Run in multiprocessing subprocesses
            if threads > 1:
                pool = mp.Pool(threads)
                dispatcher = pool.imap(_detect_sub_mat, sub_mat_data, 1)
            else:
                dispatcher = map(_detect_sub_mat, sub_mat_data)
            for s, result in enumerate(dispatcher):
                cio.progress(s, n_sub_mats, f"{result['chr1']}-{result['chr2']}")
                sub_mat_results.append(result)

            # Convert coordinates from chromosome to whole genome bins
            kernel_coords = [
                hic_genome.get_full_mat_pattern(
                    d["chr1"], d["chr2"], d["coords"]
                )
                for d in sub_mat_results
                if d["coords"] is not None
            ]

            # Gather newly detected pattern coordinates
            try:
                # Extract surrounding windows for each sub_matrix
                kernel_windows = np.concatenate(
                    [
                        w["windows"]
                        for w in sub_mat_results
                        if w["windows"] is not None
                    ],
                    axis=0,
                )
                all_coords.append(
                    pd.concat(kernel_coords, axis=0).reset_index(drop=True)
                )
                # Add info about kernel and iteration which detected these patterns
                all_coords[-1]["kernel_id"] = kernel_id
                all_coords[-1]["iteration"] = i
                all_windows.append(kernel_windows)

            # If no pattern was found with this kernel
            # skip directly to the next one, skipping iterations
            except ValueError:
                break

            # Update kernel with patterns detected at current iteration
            kernel_matrix = cid.pileup_patterns(kernel_windows)
            run_id += 1
    cio.progress(run_id, total_runs, f"Kernel: {kernel_id}, Iteration: {i}\n")
    # If no pattern detected on any chromosome, with any kernel, exit gracefully
    if len(all_coords) == 0:
        sys.stderr.write("No pattern detected ! Exiting.\n")
        sys.exit(0)
    # Finish parallelized part
    if threads > 1:
        pool.close()
    # Combine patterns of all kernel matrices into a single array
    all_coords = pd.concat(all_coords, axis=0).reset_index(drop=True)
    # Combine all windows from different kernels into a single pile of windows
    all_windows = np.concatenate(all_windows, axis=0)

    # Compute minimum separation in bins and make sure it has a reasonable value
    separation_bins = int(cfg["min_separation"] // hic_genome.clr.binsize)
    if separation_bins < 1:
        separation_bins = 1
    print(f"Minimum pattern separation is : {separation_bins}")
    # Remove patterns with overlapping windows (smeared patterns)
    distinct_patterns = cid.remove_neighbours(
        all_coords, win_size=separation_bins
    )

    # Drop patterns that are too close to each other
    all_coords = all_coords.loc[distinct_patterns, :]
    all_windows = all_windows[distinct_patterns, :, :]

    # Get from bins into basepair coordinates
    coords_1 = hic_genome.bins_to_coords(all_coords.bin1).reset_index(
        drop=True
    )
    coords_1.columns = [str(col) + "1" for col in coords_1.columns]
    coords_2 = hic_genome.bins_to_coords(all_coords.bin2).reset_index(
        drop=True
    )
    coords_2.columns = [str(col) + "2" for col in coords_2.columns]

    all_coords = pd.concat(
        [all_coords.reset_index(drop=True), coords_1, coords_2], axis=1
    )

    # Filter patterns closer than minimum distance from the diagonal if any
    min_dist_drop_mask = (all_coords.chrom1 == all_coords.chrom2) & (
        np.abs(all_coords.start2 - all_coords.start1) < cfg["min_dist"]
    )
    all_coords = all_coords.loc[~min_dist_drop_mask, :]
    all_windows = all_windows[~min_dist_drop_mask, :, :]
    del min_dist_drop_mask

    # Remove patterns with nan p-values (no contact in window)
    pval_mask = all_coords.pvalue.isnull()
    all_coords = all_coords.loc[~pval_mask, :]
    all_windows = all_windows[~pval_mask, :, :]
    del pval_mask
    # Correct p-values for multiple testing using FDR
    all_coords["qvalue"] = fdr_correction(all_coords["pvalue"])
    # Reorder columns
    all_coords = all_coords.loc[
        :,
        [
            "chrom1",
            "start1",
            "end1",
            "chrom2",
            "start2",
            "end2",
            "bin1",
            "bin2",
            "kernel_id",
            "iteration",
            "score",
            "pvalue",
            "qvalue",
        ],
    ]

    ### 3: WRITE OUTPUT
    sys.stderr.write(f"{all_coords.shape[0]} patterns detected\n")
    # Save patterns and their coordinates in a tsv file

    sys.stderr.write(f"Saving patterns in {prefix}.tsv\n")
    cio.write_patterns(all_coords, prefix)
    # Save windows as an array in an npy file

    sys.stderr.write(f"Saving patterns in {prefix}.{win_fmt}\n")
    cio.save_windows(all_windows, prefix, fmt=win_fmt)

    # Generate pileup visualisations if requested
    if plotting_enabled:
        # Compute and plot pileup
        pileup_title = ("Pileup of {n} {pattern}").format(
            pattern=cfg["name"], n=all_windows.shape[0]
        )
        windows_pileup = cid.pileup_patterns(all_windows)
        # Symmetrize pileup for diagonal patterns
        if not cfg["max_dist"]:
            # Replace nan below diag by 0
            windows_pileup = np.nan_to_num(windows_pileup)
            # Add transpose
            windows_pileup += np.transpose(windows_pileup) - np.diag(
                np.diag(windows_pileup)
            )
        sys.stderr.write(f"Saving pileup plots in {prefix}.pdf\n")
        pileup_plot(windows_pileup, prefix, name=pileup_title)
Пример #4
0
def cmd_quantify(args):
    bed2d_path = args["<bed2d>"]
    mat_path = args["<contact_map>"]
    prefix = args["<prefix>"]
    n_mads = float(args["--n-mads"])
    pattern = args["--pattern"]
    inter = args["--inter"]
    kernel_config_path = args["--kernel-config"]
    perc_zero = args["--perc-zero"]
    perc_undetected = args["--perc-undetected"]
    plotting_enabled = False if args["--no-plotting"] else True
    threads = int(args["--threads"])
    norm = args["--norm"]
    tsvd = 0.999 if args["--tsvd"] else None
    win_fmt = args["--win-fmt"]
    if win_fmt not in ["npy", "json"]:
        sys.stderr.write("Error: --win-fmt must be either json or npy.\n")
        sys.exit(1)
    win_size = args["--win-size"]
    if win_size != "auto":
        win_size = int(win_size)
    subsample = args["--subsample"]
    # If prefix involves a directory, crash if it does not exist
    cio.check_prefix_dir(prefix)
    # Load 6 cols from 2D BED file and infer header
    bed2d = cio.load_bed2d(bed2d_path)
    # Warn user if --inter is disabled but list contains inter patterns
    if not inter and len(bed2d.start1[bed2d.chrom1 != bed2d.chrom2]) > 0:
        sys.stderr.write(
            "Warning: The bed2d file contains interchromosomal patterns. "
            "These patterns will not be scanned unless --inter is used.\n"
        )
    if kernel_config_path is not None:
        custom = True
        # Loading input path as config
        config_path = kernel_config_path
    else:
        custom = False
        # Will use a preset config file matching pattern name
        config_path = pattern
    cfg = cio.load_kernel_config(config_path, custom)
    # Subsample Hi-C contacts from the matrix, if requested
    if subsample == "no":
        subsample = None
    # Instantiate and preprocess contact map
    hic_genome = HicGenome(
        mat_path, inter=inter, kernel_config=cfg, sample=subsample
    )
    # enforce max scanning distance to pattern at longest distance
    furthest = np.max(bed2d.start2 - bed2d.start1)
    max_diag = hic_genome.clr.shape[0] * hic_genome.clr.binsize
    cfg["max_dist"] = min(furthest, max_diag)
    cfg["min_dist"] = 0
    cfg["tsvd"] = tsvd
    cfg = _override_kernel_config("max_perc_zero", perc_zero, float, cfg)
    cfg = _override_kernel_config(
        "max_perc_undetected", perc_undetected, float, cfg
    )

    # Notify contact map instance of changes in scanning distance
    hic_genome.kernel_config = cfg
    # Normalize (balance) matrix using ICE
    hic_genome.normalize(norm=norm, n_mads=n_mads, threads=threads)
    # Initialize output structures
    bed2d["score"] = np.nan
    bed2d["pvalue"] = np.nan
    positions = bed2d.copy()
    # Only resize kernel matrix if explicitely requested
    km, kn = cfg["kernels"][0].shape
    n_kernels = len(cfg['kernels'])
    if win_size != "auto":
        if not win_size % 2:
            raise ValueError("--win-size must be odd")
        for i, k in enumerate(cfg["kernels"]):
            cfg["kernels"][i] = resize_kernel(k, factor=win_size / km)
        km = kn = win_size
        # Update kernel config after resizing kernels
        hic_genome.kernel_config = cfg
    # Define how many diagonals should be used in intra-matrices
    hic_genome.compute_max_dist()
    # Split whole genome matrix into intra- and inter- sub matrices. Each sub
    # matrix is processed on the fly (obs / exp, trimming diagonals > max dist)
    hic_genome.make_sub_matrices()
    windows = np.full((positions.shape[0], km, kn), np.nan)
    # We will store a copy of coordinates for each kernel
    bed2d_out = [bed2d.copy() for _ in range(n_kernels)]
    windows_out = [windows.copy() for _ in range(n_kernels)]
    # For each position, we use the center of the BED interval
    positions["pos1"] = (positions.start1 + positions.end1) // 2
    positions["pos2"] = (positions.start2 + positions.end2) // 2
    # Use each kernel matrix available for the pattern
    for kernel_id, kernel_matrix in enumerate(cfg["kernels"]):
        cio.progress(kernel_id, len(cfg["kernels"]), f"Kernel: {kernel_id}\n")
        n_sub_mats = hic_genome.sub_mats.shape[0]
        # Retrieve input positions for each submatrix and convert
        # coordinates from whole genome to submatrix.
        sub_pos = [
            _get_chrom_pos(positions, hic_genome, m[1].chr1, m[1].chr2)
            for m in hic_genome.sub_mats.iterrows()
        ]
        # Apply quantification procedure to all sub matrices in parallel
        sub_mat_data = zip(
            hic_genome.sub_mats.iterrows(),
            [cfg for _ in range(n_sub_mats)],
            [kernel_matrix for _ in range(n_sub_mats)],
            [s[1] for s in sub_pos],
        )
        # Run quantification in parallel on different sub matrices,
        # and show progress when gathering results
        sub_mat_results = []
        # Run in multiprocessing subprocesses
        if threads > 1:
            pool = mp.Pool(threads)
            dispatcher = pool.imap(_quantify_sub_mat, sub_mat_data, 1)
        else:
            dispatcher = map(_quantify_sub_mat, sub_mat_data)
        for s, result in enumerate(dispatcher):
            cio.progress(s, n_sub_mats, f"{result['chr1']}-{result['chr2']}")
            sub_mat_results.append(result)

        for i, r in enumerate(sub_mat_results):
            # If there were no patterns on that sub matrix, just skip it
            if r['coords'] is None:
                continue
            sub_pat_idx = sub_pos[i][0]

            # For each coordinate, keep the highest coefficient
            # among all kernels.
            try:
                bed2d_out[kernel_id]['score'][sub_pat_idx] = r['coords'].score.values
                bed2d_out[kernel_id]["pvalue"][sub_pat_idx] = r["coords"].pvalue.values
                windows_out[kernel_id][sub_pat_idx, :, :] = r["windows"]
            # Do nothing if no pattern was detected or matrix
            # is smaller than the kernel (-> patterns is None)
            except AttributeError:
                pass
    # Select the best score for each coordinate (among the different kernels)
    bed2d = pd.concat(bed2d_out, axis=0).reset_index(drop=True)
    windows = np.concatenate(windows_out, axis=0)
    bed2d = (
        bed2d
        .sort_values('score', ascending=True)
        .groupby(['chrom1', 'start1', 'chrom2', 'start2'], sort=False)
        .tail(1)
    )
    windows = windows[bed2d.index, :, :]
    bed2d = bed2d.reset_index(drop=True)
    bed2d["bin1"] = hic_genome.coords_to_bins(
        bed2d.loc[:, ["chrom1", "start1"]].rename(
            columns={"chrom1": "chrom", "start1": "pos"}
        )
    )
    bed2d["bin2"] = hic_genome.coords_to_bins(
        bed2d.loc[:, ["chrom2", "start2"]].rename(
            columns={"chrom2": "chrom", "start2": "pos"}
        )
    )
    bed2d["qvalue"] = fdr_correction(bed2d["pvalue"])
    bed2d = bed2d.loc[
        :,
        [
            "chrom1",
            "start1",
            "end1",
            "chrom2",
            "start2",
            "end2",
            "bin1",
            "bin2",
            "score",
            "pvalue",
            "qvalue",
        ],
    ]
    # Set p-values of invalid scores to nan
    bed2d.loc[np.isnan(bed2d.score), "pvalue"] = np.nan
    bed2d.loc[np.isnan(bed2d.score), "qvalue"] = np.nan
    # Sort by whole genome coordinates to match input order
    bed2d = (
        bed2d
        .sort_values(['bin1', 'bin2'], ascending=True)
        .reset_index(drop=True)
    )
    cio.write_patterns(bed2d, prefix)
    cio.save_windows(windows, prefix, fmt=win_fmt)
    # Generate pileup visualisations if requested
    if plotting_enabled:
        # Compute and plot pileup
        pileup_title = ("pileup_of_{n}_{pattern}").format(
            pattern=cfg["name"], n=windows.shape[0]
        )
        windows_pileup = cid.pileup_patterns(windows)
        # Symmetrize pileup for diagonal patterns
        if not cfg["max_dist"]:
            # Replace nan below diag by 0
            windows_pileup = np.nan_to_num(windows_pileup)
            # Add transpose
            windows_pileup += np.transpose(windows_pileup) - np.diag(
                np.diag(windows_pileup)
            )
        sys.stderr.write(f"Saving pileup plots in {prefix}.pdf\n")
        pileup_plot(windows_pileup, prefix, name=pileup_title)