示例#1
0
def test_logger():
    import pytest
    from mspypeline.helpers import get_logger
    from logging import Logger
    assert isinstance(get_logger("test", "warning"), Logger)
    assert isinstance(get_logger(loglevel=10), Logger)

    with pytest.warns(RuntimeWarning) as record:
        assert isinstance(get_logger(loglevel="asd"), Logger)
        assert len(record) == 1
示例#2
0
    def __init__(self,
                 input_scale: str = "log2",
                 output_scale: str = "normal",
                 col_name_prefix: Optional[str] = None,
                 loglevel: int = logging.DEBUG,
                 **kwargs):
        """
        Abstract base class for Normalizers. Derived normalizers should implement the :meth:`fit` and :meth:`transform`.

        Parameters
        ----------
        input_scale
            Scale of the input data. Either normal or log2
        output_scale
            Scale of the output data. Either normal or log2
        col_name_prefix
            If not None the prefix is added to each column name
        loglevel
            loglevel of the logger
        kwargs
            accepts kwargs
        """
        self.loglevel = loglevel
        self.logger = get_logger(self.__class__.__name__, self.loglevel)
        allowed_scales = ("log2", "normal")
        if input_scale not in allowed_scales:
            raise ValueError("input_scale should be one of: " +
                             ", ".join(allowed_scales))
        if output_scale not in allowed_scales:
            raise ValueError("out_scale should be one of: " +
                             ", ".join(allowed_scales))
        self.input_scale = input_scale
        self.output_scale = output_scale
        self.col_name_prefix = col_name_prefix
示例#3
0
    def __init__(self,
                 dir_: str,
                 file_path_yml: Optional[str] = None,
                 loglevel=logging.DEBUG):
        self.logger = get_logger(self.__class__.__name__, loglevel=loglevel)
        # create a yaml file reader
        self.yaml = YAML()
        # self.yaml.indent(mapping=2, sequence=4, offset=2)
        self.yaml.indent(offset=2)
        self.yaml.default_flow_style = False
        self.yaml.width = 4096

        # attributes that change upon changing the starting dir
        self.configs = {}
        self.reader_data = {}

        self.interesting_proteins, self.go_analysis_gene_names = None, None

        # properties
        self._start_dir = None
        self._file_path_yaml = None

        # set the specified dirs
        self.start_dir = dir_
        if file_path_yml is not None:
            self.file_path_yaml = file_path_yml
示例#4
0
    def __init__(self, start_dir: str, reader_config: dict, loglevel: int = logging.DEBUG):
        """

        Parameters
        ----------
        start_dir
            location where the directory/txt folder to the data can be found.
        reader_config
            mapping of the file reader configuration (as e.g. given in the config.yml file)
        loglevel
            level of the logger

        """
        self.full_data: DataDict = DataDict(data_source=self)
        self.start_dir: str = start_dir
        self.reader_config: dict = reader_config
        self.logger: Logger = get_logger(self.__class__.__name__, loglevel)

        # log which files will be read
        self.logger.info("Required files: %s", self.required_files)

        if not reader_config:
            self.logger.warning("Empty configs")
        else:
            self.logger.debug("Got configs: %s", self.reader_config)
        if start_dir is None:
            raise ValueError("Invalid starting dir")
示例#5
0
    def __init__(self,
                 start_dir: str,
                 reader_config: dict,
                 loglevel=logging.DEBUG):
        self.full_data = DataDict(data_source=self)
        self.start_dir = start_dir
        self.reader_config = reader_config
        self.logger = get_logger(self.__class__.__name__, loglevel)

        # log which files will be read
        self.logger.info("Required files: %s", self.required_files)

        if not reader_config:
            self.logger.warning("Empty configs")
        else:
            self.logger.debug("Got configs: %s", self.reader_config)
        if start_dir is None:
            raise ValueError("Invalid starting dir")
示例#6
0
    def __init__(self,
                 path: str,
                 file_path_yml: Optional[str] = None,
                 loglevel=logging.DEBUG):
        """
        Parameters
        ----------
        path
            location where the directory/txt folder to the data can be found.
        file_path_yml
            path to the yaml config file
        loglevel
            level of the logger
        """
        self.logger = get_logger(self.__class__.__name__, loglevel=loglevel)
        # create a yaml file reader
        self.yaml = YAML()
        self.yaml.indent(mapping=2, sequence=4, offset=2)
        # self.yaml.indent(offset=2)
        self.yaml.default_flow_style = False
        self.yaml.width = 4096

        # attributes that change upon changing the starting dir
        #: configurations for the run. also saved configurations for the reader under the respective reader name
        self.configs = {}
        self.reader_data = {}

        self.interesting_proteins, self.go_analysis_gene_names = None, None

        # properties
        self._start_dir = None
        self._file_path_yaml = None

        # list to store all selectable terms; custom and provided
        self.list_full_gos = []
        self.list_full_pathways = []

        # set the specified dirs
        self.start_dir = path
        if file_path_yml is not None:
            self.file_path_yaml = file_path_yml
示例#7
0
 def __setstate__(self, state):
     self.__dict__ = state
     self.logger = get_logger(self.__class__.__name__, self.loglevel)
示例#8
0
from typing import Optional, Dict, Tuple, Iterator, Union, Iterable, Sized, Callable
import pandas as pd
from collections import defaultdict as ddict
from itertools import combinations
from collections import deque
import numpy as np
from matplotlib.lines import Line2D
import os
from mspypeline.helpers import get_logger

logger = get_logger(str(os.path.basename(__file__).split(".")[0]))


def get_number_rows_cols_for_fig(obj: Union[int, Sized]) -> Tuple[int, int]:
    if isinstance(obj, Sized):
        obj = len(obj)
    n_rows, n_cols = 0, 0
    while n_rows * n_cols < obj:
        if n_rows <= n_cols:
            n_rows += 1
        else:
            n_cols += 1
    return n_rows, n_cols


def fill_dict(d: dict, s: str, s_split=None):
    if s_split is None:
        s_split = s.split("_")
    if len(s_split) > 1:
        fill_dict(d[s_split[0]], s, s_split[1:])
    else:
示例#9
0
    def __init__(self,
                 file_dir,
                 yml_file=None,
                 loglevel=logging.DEBUG,
                 configs: dict = None):
        """
        Parameters
        ----------
        file_dir
            location where the directory/txt folder to the data can be found.
        yml_file
            path to the yaml config file
        loglevel
            level of the logger
        configs
            mapping containing the configurations
        """
        super().__init__()
        self.yaml_options = ["default"]
        self.reader_options = {
            reader.name: reader
            for reader in BaseReader.__subclasses__()
        }
        self.selected_reader = MQReader.MQReader
        self.normalize_options = ["None"] + list(default_normalizers.keys())
        self.mspinit = MSPInitializer(file_dir, yml_file, loglevel=loglevel)
        self.logger = get_logger(self.__class__.__name__, loglevel=loglevel)

        self.number_of_plots = 0

        self.plot_settings = {}
        self.intensity_options = ["lfq_log2", "raw_log2", "ibaq_log2"]
        #,"lfq_normalized_log2", "raw_normalized_log2", "ibaq_normalized_log2]

        self.title("mspypeline")

        path_label = tk.Label(self,
                              text="Dir to analyze",
                              font="Helvetica 10 bold").grid(row=0, column=0)

        yaml_label = tk.Label(self, text="Yaml file",
                              font="Helvetica 10 bold").grid(row=0, column=1)

        reader_label = tk.Label(self,
                                text="File reader",
                                font="Helvetica 10 bold").grid(row=0, column=2)

        self.dir_text = tk.StringVar(value=file_dir)
        dir_button = tk.Button(
            self,
            textvariable=self.dir_text,
            command=lambda: browsefunc(
                filedialog.askdirectory,
                self.dir_text,
                fn_params={
                    "title":
                    "Please select a directory with MaxQuant result files"
                }))
        dir_button.grid(row=1, column=0)

        self.yaml_text = tk.StringVar()
        self.yaml_button = tk.OptionMenu(self, self.yaml_text,
                                         *self.yaml_options)
        self.yaml_button.grid(row=1, column=1)

        self.reader_text = tk.StringVar(value="mqreader")
        self.reader_button = tk.OptionMenu(self, self.reader_text,
                                           *self.reader_options.keys())
        self.reader_button.grid(row=1, column=2)

        self.replicate_var = tk.IntVar(value=1)
        replicate_button = tk.Checkbutton(
            self,
            text="Does the file have technical replicates?",
            variable=self.replicate_var).grid(row=2, column=0)

        go_proteins_label = tk.Label(self, text="Go analysis proteins").grid(
            row=3, column=0)

        experiments_label = tk.Label(self,
                                     text="Pathway analysis").grid(row=3,
                                                                   column=1)

        design_label = tk.Label(self, text="Replicate names").grid(row=3,
                                                                   column=2)

        self.go_term_list = tk.Listbox(self,
                                       selectmode="multiple",
                                       height=5,
                                       width=len(
                                           max(self.mspinit.list_full_gos,
                                               key=len)))
        self.go_term_list.configure(exportselection=False)
        for x in self.mspinit.list_full_gos:
            self.go_term_list.insert("end", x)

        self.go_term_list.grid(row=4, column=0)

        self.pathway_list = tk.Listbox(self,
                                       selectmode="multiple",
                                       height=5,
                                       width=len(
                                           max(self.mspinit.list_full_pathways,
                                               key=len)))
        self.pathway_list.configure(exportselection=False)
        for x in self.mspinit.list_full_pathways:
            self.pathway_list.insert("end", x)

        self.pathway_list.grid(row=4, column=1)

        self.experiments_list = tk.Listbox(self, height=5)
        self.experiments_list.grid(row=4, column=2)

        report_button = tk.Button(self,
                                  text="Create Report",
                                  command=lambda: self.report_button())
        report_button.grid(row=5, column=0)

        plot_label = tk.Label(self, text="Which plots should be created").grid(
            row=6, column=0)

        intensity_label = tk.Label(self, text="Intensities").grid(row=6,
                                                                  column=1)

        levels_label = tk.Label(self, text="Levels").grid(row=6, column=2)

        self.heading_length = 7

        tk.Label(self, text="Normalization plots",
                 font="Helvetica 10 bold").grid(row=self.heading_length +
                                                self.number_of_plots,
                                                column=0)
        self.number_of_plots += 1
        self.plot_row("Normalization overview",
                      "normalization_overview_all_normalizers")
        self.plot_row("Heatmap overview", "heatmap_overview_all_normalizers")

        tk.Label(self,
                 text="Choose a Normalization Method:",
                 font="Helvetica 10 bold").grid(row=self.heading_length +
                                                self.number_of_plots,
                                                column=1)
        self.number_of_plots += 1
        self.plot_intermediate_row("Choose a Normalization Method")

        tk.Label(self,
                 text="Outlier detection / Comparisons",
                 font="Helvetica 10 bold").grid(row=self.heading_length +
                                                self.number_of_plots,
                                                column=0)
        self.number_of_plots += 1
        self.plot_row("Detection counts", "detection_counts")
        self.plot_row("Number of detected proteins",
                      "detected_proteins_per_replicate")
        self.plot_row("Venn diagrams", "venn_results")
        self.plot_row("Group diagrams", "venn_groups")
        self.plot_row("PCA overview", "pca_overview")
        self.plot_row("Intensity histogram", "intensity_histograms")
        self.plot_row("Relative std", "relative_std")
        self.plot_row("Scatter replicates", "scatter_replicates")
        self.plot_row("Experiment comparison", "experiment_comparison")
        self.plot_row("Rank", "rank")

        tk.Label(self, text="Statistical inference",
                 font="Helvetica 10 bold").grid(row=self.heading_length +
                                                self.number_of_plots,
                                                column=0)
        self.number_of_plots += 1
        self.plot_row("Pathway Analysis", "pathway_analysis")
        #self.plot_row("Pathway Timecourse", "pathway_timecourse")
        self.plot_row("Go analysis", "go_analysis")
        self.plot_row("Volcano plot (R)", "r_volcano")
        self.p_val_var = tk.IntVar(value=1)
        pval_button = tk.Checkbutton(
            self, text="Use adjusted p value", variable=self.p_val_var).grid(
                row=self.heading_length + self.number_of_plots, column=1)

        total_length = self.heading_length + self.number_of_plots

        update_button = tk.Button(self,
                                  text="Update",
                                  command=lambda: self.update_button())
        update_button.grid(row=total_length + 1, column=1)

        start_button = tk.Button(self,
                                 text="Start",
                                 command=lambda: self.start_button())
        start_button.grid(row=total_length + 1, column=2)

        self.running_text = tk.StringVar(value="Please press Start")
        self.running_label = tk.Label(
            self, textvariable=self.running_text).grid(row=total_length + 2,
                                                       column=2)

        # add all tracing to the variables
        self.dir_text.trace("w", self.dir_setter)
        self.yaml_text.trace("w", self.yaml_path_setter)
        self.reader_text.trace("w", self.reader_setter)
        # make the GUI resizable
        self.columnconfigure(0, weight=1)
        self.rowconfigure(0, weight=1)

        self.mspinit.configs.update(configs)
        self.update_yaml_options()
        self.mainloop()