def write_multimodal_data(self, data: MultimodalData, overwrite: bool = True) -> None:
     """ Write MultimodalData
     """
     if overwrite:
         for key in data.list_data():
             self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = overwrite)
     else:
         for key in data.data.deleted:
             del self.root[key]
         for key in data.data.accessed:
             self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = key in data.data.modified)            
     self.root.attrs['_selected'] = data._selected
def write_mtx_file(data: MultimodalData, output_directory: str, precision: int = 2):
    """ Write output to mtx files in output_directory
    """
    output_dir = os.path.abspath(output_directory)
    os.makedirs(output_dir, exist_ok = True)

    for key in data.list_data():
        _write_mtx(data.get_data(key), os.path.join(output_dir, key), precision)

    logger.info("Mtx files are written.")
def _run_filter_data(
    data: MultimodalData,
    focus_list: List[str] = None,
    output_filt: str = None,
    plot_filt: str = None,
    plot_filt_figsize: Tuple[int, int] = None,
    min_genes_before_filt: int = 100,
    select_singlets: bool = False,
    remap_string: str = None,
    subset_string: str = None,
    min_genes: int = 500,
    max_genes: int = 6000,
    min_umis: int = None,
    max_umis: int = None,
    mito_prefix: str = "MT-",
    percent_mito: float = 20.0,
    percent_cells: float = 0.05,
) -> None:
    """ This function is for command line use.
    """
    if focus_list is None:
        focus_list = [data.current_key()]

    mito_dict = DictWithDefault(mito_prefix)
    for key in focus_list:
        unidata = data.get_data(key)

        qc_metrics(
            unidata,
            select_singlets,
            remap_string,
            subset_string,
            min_genes,
            max_genes,
            min_umis,
            max_umis,
            mito_dict.get(unidata.get_genome()),
            percent_mito,
        )

        if output_filt is not None:
            group_key = unidata.get_uid()
            writer = pd.ExcelWriter(f"{output_filt}.{group_key}.filt.xlsx",
                                    engine="xlsxwriter")
            df_cells = get_filter_stats(
                unidata, min_genes_before_filt=min_genes_before_filt)
            df_cells.to_excel(writer, sheet_name="Cell filtration stats")
            writer.save()
            logger.info(f"Filtration results for {group_key} are written.")

        if plot_filt is not None:
            _generate_filter_plots(unidata,
                                   plot_filt,
                                   plot_filt_figsize=plot_filt_figsize,
                                   min_genes_before_filt=min_genes_before_filt)

    filter_data(data, focus_list=focus_list)

    for key in focus_list:
        unidata = data.get_data(key)
        identify_robust_genes(unidata, percent_cells=percent_cells)
Exemplo n.º 4
0
 def add_data(self, data: MultimodalData) -> None:
     for key in data.list_data():
         self.aggr[key].append(data.get_data(key))
def write_scp_file(data: MultimodalData, output_name: str, is_sparse: bool = True, precision: int = 2) -> None:
    """Generate outputs for single cell portal from a MultimodalData object

    Parameters
    ----------
    data: MultimodalData
        A MultimodalData object.

    output_name: ``str``
        Name prefix for output files.

    is_sparse: ``bool``, optional, default: ``True``
        If ``True``, enforce the count matrix to be sparse after written into files.

    precision: ``int``, optional, default: ``2``
        Round numbers to ``precision`` decimal places.

    Returns
    -------
    ``None``

    If data contains only one modality, files are generated as follows:
        * ``output_name.scp.basis.coords.txt``, where ``basis`` is for each key in ``adata.obsm`` field.
        * ``output_name.scp.metadata.txt``.
        * Gene expression files:
            * If in sparse format:
                * ``output_name.scp.features.tsv``, information on genes;
                * ``output_name.scp.barcodes.tsv``, information on cell barcodes;
                * ``output_name.scp.matrix.mtx``, count matrix.
            * If not in sparse:
                * ``output_name.scp.expr.txt``.

    Otherwise, under the directory os.path.dirname(output_name), we will create separate folders per key and each separate folder has its own files in the format above.

    Examples
    --------
    >>> io.write_scp_file(data, output_name = "scp_result")
    """
    output_name = os.path.abspath(output_name)

    valid_keys = []
    for key in data.list_data():
        try:
            mat = data.get_data(key).get_matrix("X")
            if mat.data.size > 0:
                valid_keys.append(key)
        except ValueError:
            pass

    if len(valid_keys) == 1:
        unidata = data.get_data(valid_keys[0])
        _write_scp_metadata(unidata, output_name, precision = precision)
        _write_scp_coords(unidata, output_name, precision = precision)
        _write_scp_expression(unidata, output_name, is_sparse, precision = precision)
    else:
        path = os.path.dirname(output_name)
        fname = os.path.basename(output_name)
        for key in valid_keys:
            subpath = os.path.join(path, key)
            if not os.path.isdir(subpath):
                os.path.mkdir(subpath)
            out_new_name = os.path.join(subpath, fname)

            unidata = data.get_data(key)
            _write_scp_metadata(unidata, out_new_name, precision = precision)
            _write_scp_coords(unidata, out_new_name, precision = precision)
            _write_scp_expression(unidata, out_new_name, is_sparse, precision = precision)

    logger.info("write_scp_file is done.")