示例#1
0
    def guess_signal_name(self, keyword, signal_type=None):
        """
        Given a keyword, search between all the files signals that contain that keyword.
        If `signal_type` is set, the keyword will be searched only under that signal
        type.
        """
        proposed_sigs = Counter()
        print("Could it be...?")

        for file in log_progress(self.files):
            file_path = os.path.join(self.path, file)
            try:
                with HD5File(file_path) as hd5:
                    visits = hd5.visits
                    if not visits:
                        logging.warning(f"File {file} has no visits")
                        self.report.add_row(
                            is_readable=True,
                            sources=hd5.sources,
                            file=file,
                        )

                    possible_types = [signal_type
                                      ] if signal_type else hd5.signal_types
                    for visit in visits:
                        for sig_type in possible_types:
                            if hd5.has_sig_type(sig_type, visit=visit):
                                full_path = HD5File.get_full_type_path(
                                    signal_type,
                                    visit,
                                )
                                for sig in hd5[full_path]:
                                    if keyword in sig:
                                        proposed_sigs[sig] += 1
                                        logging.info(
                                            f"Found new signal!: {sig}")
            except (OSError, KeyError):
                logging.warning(f"File {file} could not be read")
                self.report.add_row(is_readable=False)

        return proposed_sigs
示例#2
0
 def plot_file_signals(self, file_name, visit, signals, max_len=None):
     """
     Plots all the input signals of the input file for the BLK08 duration.
     Signals can be cropped using the `max_len` parameter.
     """
     file_path = os.path.join(self.path, file_name)
     with HD5File(file_path) as hd5:
         for signal in signals:
             s = hd5.get_signal(signal, visit, max_length=max_len)
             s.time = (s.time - s.time[0]) / 3600
             plt.plot(s.time, s.values)
     plt.legend(signals)
     plt.show()
示例#3
0
 def count_incomplete_signals(self, report, max_len=None):
     """
     Counts the number of signals that aren't complete for the whole BLK08 duration.
     """
     counter = Counter()
     for file, visit in log_progress(report.files_and_visits):
         file_path = os.path.join(self.path, file)
         with HD5File(file_path) as hd5:
             for signal in report.signals:
                 s = hd5.get_signal(signal, visit, max_length=max_len)
                 s.time = (s.time - s.time[0]) / 3600
                 if s.time[-1] < max_len - 0.5:
                     counter[s.name] += 1
     return counter
示例#4
0
    def extract_data(self, report):
        patients = {}
        for file, visit in log_progress(report.files_and_visits):
            file_path = os.path.join(self.path, file)
            logging.info(f"\t file: {file_path}")
            try:
                with HD5File(file_path) as hd5:
                    patient = hd5.extract_patient(
                        visit,
                        signals=report.current_signals,
                        department=report.department,
                        max_length=report.time_studied,
                    )
                if patient:
                    patients[patient.name] = patient

            except OSError:
                logging.warning(f"File {file} is invalid!")

        bundle = Bundle(patients)
        return bundle
示例#5
0
    def plot_signal_trajectory(self, signal, report=None, max_len=None):
        """
        Plots a given signal through all the targeted files. If a report is given
        (RequestReport), only the filtered files of the report will be used.
        Otherwise, all the files on the directory containing that signal will be used.
        """
        if report:
            files_and_visits = report.files_and_visits.sort(key=lambda x: x[0])
            all_files, all_visits = zip(*files_and_visits)
        else:
            all_files = self.files

        for idx, file in log_progress(enumerate(all_files)):
            file_path = os.path.join(self.path, file)
            with HD5File(file_path) as hd5:
                visits = hd5.visits if not report else [all_visits[idx]]
                for visit in visits:
                    if hd5.has_signal(signal, visit):
                        s = hd5.get_signal(signal, visit, max_length=max_len)
                        s.time = (s.time - s.time[0]) / 3600
                        plt.plot(s.time, s.values)
        plt.show()
示例#6
0
    def get_quality(self):
        """ Crates a report with statistics from the HD5 files"""
        for file in log_progress(self.files):
            file_path = os.path.join(self.path, file)
            try:
                with HD5File(file_path) as hd5:
                    for visit in hd5.visits:
                        if hd5.has_source("edw", visit):
                            blk08 = bool(
                                hd5.get_department_duration(
                                    department="BLK08",
                                    visit=visit,
                                    only_first=True,
                                ), )
                        else:
                            blk08 = "-"
                        for sig_type in hd5.signal_types(visit):
                            self.report.add_row(
                                True,
                                hd5.sources,
                                sig_type,
                                file,
                                visit,
                                blk08,
                            )
                    if not hd5.visits:
                        logging.warning(f"File {file} has no visits")
                        self.report.add_row(
                            is_readable=True,
                            sources=hd5.sources,
                            file=file,
                        )
            except OSError:
                logging.warning(f"File {file} could not be read")
                self.report.add_row(is_readable=False)

        return self.report
示例#7
0
    def find(self, signals=None, department=None, stay_length=0):
        """
        Create a table with the overlap time of the input signals during the BLK08
        stay of the patient. If stay length is set, the considered time will be the
        period between entry to BLK08 and the next <stay_length> hours.

        Note:
        * EDW data is needed to get the BLK08 stay of the patient, so any hd5 file
          without EDW data will be ignored.
        * Patients that haven't gone through BLK08 or whose BLK08 data is missing
          (admittance or discharge from BLK08 is unknown) are ignored.
        * If a patient has gone through BLK08 multiple times, only the first BLK08 stay
          will be used.

        :param signals: list with the signals to be searched.
        :param department: department where the signal will be looked at. If None,
                           the whole signal will be taken.
        :param stay_length: length of the stay (in hours) to consider the signal.
                            If set to 0 it will take the whole BLK08 stay.
        :return:
        """
        timeseries_types = ["vitals", "waveform", "flowsheet", "labs"]

        tmap_signals = []
        for stype in timeseries_types:
            tmap_signals.extend(DEFINED_TMAPS[stype])

        if signals:
            if all(sig in timeseries_types for sig in signals):
                logging.info(
                    "Signal types instead of individual signals detected."
                    "Will take all the tmaps for those types.", )
                tmap_signals = []
                for stype in signals:
                    tmap_signals.extend(DEFINED_TMAPS[stype])

                signals = tmap_signals
            else:
                logging.info("Individual signal detected")
                not_valid_signals = set(signals) - set(tmap_signals)
                if not_valid_signals:
                    logging.warning(
                        f"Signals {not_valid_signals} don't have an "
                        f"associated tmap. They won't be used", )
                signals = set(signals) & set(tmap_signals)
        else:
            signals = tmap_signals

        results = {}
        for file in log_progress(self.files, desc="Finding files..."):
            file_path = os.path.join(self.path, file)
            logging.info(f"\t file: {file_path}")
            try:
                with HD5File(file_path) as hd5:
                    for visit_id in hd5.visits:

                        file_id = FILE_ID(file, visit_id)

                        if not hd5.has_source("edw", visit=visit_id):
                            logging.info(
                                f"CSN {visit_id} of MRN {hd5.mrn} "
                                f"does not have edw data. Ignorning it", )
                            continue

                        dpmt_stays = hd5.get_department_duration(
                            visit_id,
                            department=department,
                            max_len=stay_length,
                            only_first=True,
                            asunix=False,
                        )
                        if not dpmt_stays:
                            continue

                        file_info = {}

                        dpmt_stay = dpmt_stays[0]
                        dpmt_duration = dpmt_stay["end"] - dpmt_stay["start"]
                        dpmt_duration = dpmt_duration.total_seconds() / 3600

                        file_info["_period_studied"] = dpmt_duration
                        file_info["_period_studied_start"] = dpmt_stay[
                            "start"].timestamp()
                        file_info["_period_studied_end"] = dpmt_stay[
                            "end"].timestamp()

                        for signal in signals:
                            signal_info = hd5.find_signal(
                                signal,
                                department=department,
                                visit=visit_id,
                                max_length=stay_length,
                            )

                            for info in signal_info:
                                if info not in file_info:
                                    file_info[info] = {}
                                file_info[info][signal] = signal_info[info]

                        results[file_id] = file_info

            except OSError:
                logging.warning(f"File {file} is invalid!")

        request_report = RequestReport(results, department=department)
        return request_report