def restrict_ward(self, wards: List[str]) -> None: """Restrict to the given wards, dropping the rest. :param wards: The wards to keep only. """ qry = and_query( *[column_query(CURRENT_WARD, ward_, "!=") for ward_ in wards]) drop_index = self.data.query(qry).index self.data = self.data.drop(index=drop_index) qry = and_query( column_query(POST_WARD, EXTERNAL, "!="), *[column_query(POST_WARD, ward_, "!=") for ward_ in wards]) replace_index = self.data.query(qry).index self.data.loc[replace_index, POST_WARD] = INTERNAL qry = and_query( column_query(PRE_WARD, EXTERNAL, "!="), *[column_query(PRE_WARD, ward_, "!=") for ward_ in wards]) replace_index = self.data.query(qry).index self.data.loc[replace_index, PRE_WARD] = INTERNAL
def clean_data(self) -> None: """Clean the data from multiple entries regarding the same stay. This should refelct the special needs for the data sheets obtained from the hospital. """ self.backup() data = self.data.copy() data.loc[:, PATIENT] = float("NaN") data.loc[:, POST_WARD] = float("NaN") df = pd.DataFrame(columns=data.columns) i = 0 while i < data.shape[0]: rowi = data.iloc[i, :] # query parameters should reflect exactly one patient qry = and_query(column_query(BIRTH, rowi.loc[BIRTH]), column_query(SEX, rowi.loc[SEX]), column_query(GLOB_BEGIN, rowi.loc[GLOB_BEGIN]), column_query(GLOB_END, rowi.loc[GLOB_END]), column_query(FA_BEGIN, rowi.loc[FA_BEGIN]), column_query(DIAGNR, rowi.loc[DIAGNR])) patient_data = data.query(qry) patient_data.loc[:, PATIENT] = i + 1 data = data.drop(index=patient_data.iloc[1:, :].index) patient_data = self._clean_patient_data(patient_data) df = df.append(patient_data) i += 1 # now data is clean, work with it, self.data is still saved in csv! # it would be cleaner to sort by date, but this can be done later too # -> since time is not yet formatted, sorting by date is no good idea! self.data = df.sort_index()
def clean_data_gen( self ) -> Generator[Tuple[int, pd.DataFrame, pd.DataFrame], None, None]: """A generator which helps understand the cleaning process of clean_data. :yields: one after another all entries associated with a specific patient and its DataFrame obtained through cleaning those with clean_patient_data. """ data = self.data.copy() data.loc[:, PATIENT] = float("NaN") data.loc[:, POST_WARD] = float("NaN") i = 0 while i < data.shape[0]: rowi = data.iloc[i] # query parameters should reflect exactly one patient qry = and_query(column_query(BIRTH, rowi[BIRTH]), column_query(SEX, rowi[SEX]), column_query(GLOB_BEGIN, rowi[GLOB_BEGIN]), column_query(GLOB_END, rowi[GLOB_END]), column_query(FA_BEGIN, rowi[FA_BEGIN]), column_query(DIAGNR, rowi[DIAGNR])) patient_data = data.query(qry) patient_data_ = self._clean_patient_data(patient_data) yield i, patient_data, patient_data_ data = data.drop(index=patient_data.iloc[1:].index) i += 1
def inter_arrival_fit(self, classes: Optional[List[int]] = None, distributions: Optional[List[Callable[ [Union[List[float], np.ndarray, pd.Series]], Union[Hypererlang, scipy.stats.expon]]]] = None, filename="inter_arrival_fit") -> List[HospitalSpecs]: """compute inter arrival fit distributions from data. :param classes: The classes to include, if empty include all. :param distributions: Callables which return fitted distributions to data. :param filename: Filename for plot. :return: A numpy array holding the distributions for each ward and class. If multiple distributions are given, a numpy.zero array will be returned. """ if classes is None: if hasattr(self.analyser, "classes"): classes = self.analyser.classes else: classes = [0] if distributions is None: distributions = [fit_expon] arrivals = [ np.zeros((len(self.analyser.wards), len(classes), 2), dtype="O") for _ in range(len(distributions)) ] for j, origin in enumerate([EXTERNAL, INTERNAL, [INTERNAL, EXTERNAL]]): for ward in self.analyser.wards: for i, class_ in enumerate(classes): qry = and_query(column_query(CURRENT_WARD, ward), column_query(CURRENT_CLASS, class_)) class_data = self.analyser.data.query(qry).dropna( subset=[BEGIN, END]) class_data["Arrival"] = self.analyser.make_inter_arrival( class_data, pre_ward=[origin]) if ward == "PACU": class_data = drop_week_arrival(class_data, week=True) arrival_data = class_data["Arrival"].dropna() distribution_fits: List[Union[Hypererlang, scipy.stats.expon]] = [] if not arrival_data.empty: for k, distribution_ in enumerate(distributions): distribution_fits.append( distribution_(arrival_data)) if j in [0, 1]: arrivals[k][self.analyser.wards_map[ward], i, j] = distribution_fits[0] title = f"ward: {ward}, class: {int(class_)}, origin: {origin}" plot_distribution_fit(arrival_data, distribution_fits, title=title) d = ", ".join([d.name for d in distribution_fits]) filename_ = filename + f" - distributions[{d}] - ward[{ward}] - " \ f"class[{int(class_)}] - origin[{origin}].pdf" plt.savefig(self.output_dir.joinpath(filename_)) plt.close() self.hospital_specs.set_arrival(arrivals[0]) hospital_specs = [ self.hospital_specs.copy() for _ in range(len(distributions)) ] for specs, arrival in zip(hospital_specs, arrivals): specs.set_arrival(arrival) return hospital_specs
def service_fit( self, classes: Optional[List[int]] = None, distributions: Optional[List[ Callable[[Union[List[float], np.ndarray, pd.Series]], Union[Hypererlang, scipy.stats.expon]]]] = None, filename="service_fit", ) -> List[HospitalSpecs]: """Compute service fit distributions from data. :param classes: The classes to include, if empty include all. :param distributions: Callables which return fitted distributions to data. :param filename: The filename for plot saving. :return: A numpy array holding the distributions for each ward and class. If multiple distributions are given, a numpy.zero array will be returned. """ if classes is None: if hasattr(self.analyser, "classes"): classes = self.analyser.classes else: classes = [0] if distributions is None: distributions = [fit_expon] services = [ np.zeros((len(self.analyser.wards), len(classes)), dtype="O") for _ in range(len(distributions)) ] self.analyser.make_service() self.logger.info(f"Modell for service.") for ward in self.analyser.wards: for i, class_ in enumerate(classes): qry = and_query(column_query(CURRENT_WARD, ward), column_query(CURRENT_CLASS, class_)) class_data = self.analyser.data.query(qry) service_data = class_data[SERVICE].dropna() distribution_fits: List[Union[Hypererlang, scipy.stats.expon]] = [] if not service_data.empty: for j, distribution_ in enumerate(distributions): distribution_fit = distribution_(service_data) distribution_fits.append(distribution_fit) title = f"Ward: {ward}, Class: {int(class_)}" plot_distribution_fit(service_data, [distribution_fit], title=title) filename_ = filename.format(distribution_fit.name, ward, int(class_)) plt.savefig( self.output_dir.joinpath(f"{filename_}.pdf")) plt.close() services[j][self.analyser.wards_map[ward], i] = distribution_fit title = f"ward: {ward}, class: {int(class_)}" plot_distribution_fit(service_data, distribution_fits, title=title) d = ", ".join([d.name for d in distribution_fits]) filename_ = filename + f" - distributions[{d}] - ward[{ward}] - " \ f"class[{int(class_)}].pdf" plt.savefig(self.output_dir.joinpath(filename_)) plt.close() self.hospital_specs.set_service(services[0]) hospital_specs = [ self.hospital_specs.copy() for _ in range(len(distributions)) ] for specs, service in zip(hospital_specs, services): specs.set_service(service) return hospital_specs