def guess_signal_name(self, keyword, signal_type=None): """ Given a keyword, search between all the files signals that contain that keyword. If `signal_type` is set, the keyword will be searched only under that signal type. """ proposed_sigs = Counter() print("Could it be...?") for file in log_progress(self.files): file_path = os.path.join(self.path, file) try: with HD5File(file_path) as hd5: visits = hd5.visits if not visits: logging.warning(f"File {file} has no visits") self.report.add_row( is_readable=True, sources=hd5.sources, file=file, ) possible_types = [signal_type ] if signal_type else hd5.signal_types for visit in visits: for sig_type in possible_types: if hd5.has_sig_type(sig_type, visit=visit): full_path = HD5File.get_full_type_path( signal_type, visit, ) for sig in hd5[full_path]: if keyword in sig: proposed_sigs[sig] += 1 logging.info( f"Found new signal!: {sig}") except (OSError, KeyError): logging.warning(f"File {file} could not be read") self.report.add_row(is_readable=False) return proposed_sigs
def plot_file_signals(self, file_name, visit, signals, max_len=None): """ Plots all the input signals of the input file for the BLK08 duration. Signals can be cropped using the `max_len` parameter. """ file_path = os.path.join(self.path, file_name) with HD5File(file_path) as hd5: for signal in signals: s = hd5.get_signal(signal, visit, max_length=max_len) s.time = (s.time - s.time[0]) / 3600 plt.plot(s.time, s.values) plt.legend(signals) plt.show()
def count_incomplete_signals(self, report, max_len=None): """ Counts the number of signals that aren't complete for the whole BLK08 duration. """ counter = Counter() for file, visit in log_progress(report.files_and_visits): file_path = os.path.join(self.path, file) with HD5File(file_path) as hd5: for signal in report.signals: s = hd5.get_signal(signal, visit, max_length=max_len) s.time = (s.time - s.time[0]) / 3600 if s.time[-1] < max_len - 0.5: counter[s.name] += 1 return counter
def extract_data(self, report): patients = {} for file, visit in log_progress(report.files_and_visits): file_path = os.path.join(self.path, file) logging.info(f"\t file: {file_path}") try: with HD5File(file_path) as hd5: patient = hd5.extract_patient( visit, signals=report.current_signals, department=report.department, max_length=report.time_studied, ) if patient: patients[patient.name] = patient except OSError: logging.warning(f"File {file} is invalid!") bundle = Bundle(patients) return bundle
def plot_signal_trajectory(self, signal, report=None, max_len=None): """ Plots a given signal through all the targeted files. If a report is given (RequestReport), only the filtered files of the report will be used. Otherwise, all the files on the directory containing that signal will be used. """ if report: files_and_visits = report.files_and_visits.sort(key=lambda x: x[0]) all_files, all_visits = zip(*files_and_visits) else: all_files = self.files for idx, file in log_progress(enumerate(all_files)): file_path = os.path.join(self.path, file) with HD5File(file_path) as hd5: visits = hd5.visits if not report else [all_visits[idx]] for visit in visits: if hd5.has_signal(signal, visit): s = hd5.get_signal(signal, visit, max_length=max_len) s.time = (s.time - s.time[0]) / 3600 plt.plot(s.time, s.values) plt.show()
def get_quality(self): """ Crates a report with statistics from the HD5 files""" for file in log_progress(self.files): file_path = os.path.join(self.path, file) try: with HD5File(file_path) as hd5: for visit in hd5.visits: if hd5.has_source("edw", visit): blk08 = bool( hd5.get_department_duration( department="BLK08", visit=visit, only_first=True, ), ) else: blk08 = "-" for sig_type in hd5.signal_types(visit): self.report.add_row( True, hd5.sources, sig_type, file, visit, blk08, ) if not hd5.visits: logging.warning(f"File {file} has no visits") self.report.add_row( is_readable=True, sources=hd5.sources, file=file, ) except OSError: logging.warning(f"File {file} could not be read") self.report.add_row(is_readable=False) return self.report
def find(self, signals=None, department=None, stay_length=0): """ Create a table with the overlap time of the input signals during the BLK08 stay of the patient. If stay length is set, the considered time will be the period between entry to BLK08 and the next <stay_length> hours. Note: * EDW data is needed to get the BLK08 stay of the patient, so any hd5 file without EDW data will be ignored. * Patients that haven't gone through BLK08 or whose BLK08 data is missing (admittance or discharge from BLK08 is unknown) are ignored. * If a patient has gone through BLK08 multiple times, only the first BLK08 stay will be used. :param signals: list with the signals to be searched. :param department: department where the signal will be looked at. If None, the whole signal will be taken. :param stay_length: length of the stay (in hours) to consider the signal. If set to 0 it will take the whole BLK08 stay. :return: """ timeseries_types = ["vitals", "waveform", "flowsheet", "labs"] tmap_signals = [] for stype in timeseries_types: tmap_signals.extend(DEFINED_TMAPS[stype]) if signals: if all(sig in timeseries_types for sig in signals): logging.info( "Signal types instead of individual signals detected." "Will take all the tmaps for those types.", ) tmap_signals = [] for stype in signals: tmap_signals.extend(DEFINED_TMAPS[stype]) signals = tmap_signals else: logging.info("Individual signal detected") not_valid_signals = set(signals) - set(tmap_signals) if not_valid_signals: logging.warning( f"Signals {not_valid_signals} don't have an " f"associated tmap. They won't be used", ) signals = set(signals) & set(tmap_signals) else: signals = tmap_signals results = {} for file in log_progress(self.files, desc="Finding files..."): file_path = os.path.join(self.path, file) logging.info(f"\t file: {file_path}") try: with HD5File(file_path) as hd5: for visit_id in hd5.visits: file_id = FILE_ID(file, visit_id) if not hd5.has_source("edw", visit=visit_id): logging.info( f"CSN {visit_id} of MRN {hd5.mrn} " f"does not have edw data. Ignorning it", ) continue dpmt_stays = hd5.get_department_duration( visit_id, department=department, max_len=stay_length, only_first=True, asunix=False, ) if not dpmt_stays: continue file_info = {} dpmt_stay = dpmt_stays[0] dpmt_duration = dpmt_stay["end"] - dpmt_stay["start"] dpmt_duration = dpmt_duration.total_seconds() / 3600 file_info["_period_studied"] = dpmt_duration file_info["_period_studied_start"] = dpmt_stay[ "start"].timestamp() file_info["_period_studied_end"] = dpmt_stay[ "end"].timestamp() for signal in signals: signal_info = hd5.find_signal( signal, department=department, visit=visit_id, max_length=stay_length, ) for info in signal_info: if info not in file_info: file_info[info] = {} file_info[info][signal] = signal_info[info] results[file_id] = file_info except OSError: logging.warning(f"File {file} is invalid!") request_report = RequestReport(results, department=department) return request_report