Exemplo n.º 1
0
    def restrict_ward(self, wards: List[str]) -> None:
        """Restrict to the given wards, dropping the rest.

        :param wards: The wards to keep only.
        """
        qry = and_query(
            *[column_query(CURRENT_WARD, ward_, "!=") for ward_ in wards])
        drop_index = self.data.query(qry).index
        self.data = self.data.drop(index=drop_index)

        qry = and_query(
            column_query(POST_WARD, EXTERNAL, "!="),
            *[column_query(POST_WARD, ward_, "!=") for ward_ in wards])
        replace_index = self.data.query(qry).index
        self.data.loc[replace_index, POST_WARD] = INTERNAL

        qry = and_query(
            column_query(PRE_WARD, EXTERNAL, "!="),
            *[column_query(PRE_WARD, ward_, "!=") for ward_ in wards])
        replace_index = self.data.query(qry).index
        self.data.loc[replace_index, PRE_WARD] = INTERNAL
Exemplo n.º 2
0
    def clean_data(self) -> None:
        """Clean the data from multiple entries regarding the same stay.

        This should refelct the special needs for the data sheets
        obtained from the hospital.
        """

        self.backup()
        data = self.data.copy()

        data.loc[:, PATIENT] = float("NaN")
        data.loc[:, POST_WARD] = float("NaN")

        df = pd.DataFrame(columns=data.columns)

        i = 0

        while i < data.shape[0]:
            rowi = data.iloc[i, :]

            # query parameters should reflect exactly one patient
            qry = and_query(column_query(BIRTH, rowi.loc[BIRTH]),
                            column_query(SEX, rowi.loc[SEX]),
                            column_query(GLOB_BEGIN, rowi.loc[GLOB_BEGIN]),
                            column_query(GLOB_END, rowi.loc[GLOB_END]),
                            column_query(FA_BEGIN, rowi.loc[FA_BEGIN]),
                            column_query(DIAGNR, rowi.loc[DIAGNR]))

            patient_data = data.query(qry)

            patient_data.loc[:, PATIENT] = i + 1

            data = data.drop(index=patient_data.iloc[1:, :].index)

            patient_data = self._clean_patient_data(patient_data)

            df = df.append(patient_data)

            i += 1

        # now data is clean, work with it, self.data is still saved in csv!
        # it would be cleaner to sort by date, but this can be done later too
        # -> since time is not yet formatted, sorting by date is no good idea!
        self.data = df.sort_index()
Exemplo n.º 3
0
    def clean_data_gen(
            self
    ) -> Generator[Tuple[int, pd.DataFrame, pd.DataFrame], None, None]:
        """A generator which helps understand the cleaning process of
        clean_data.

        :yields: one after another all entries associated with a
        specific patient and its DataFrame obtained through cleaning
        those with clean_patient_data.
        """

        data = self.data.copy()

        data.loc[:, PATIENT] = float("NaN")
        data.loc[:, POST_WARD] = float("NaN")

        i = 0

        while i < data.shape[0]:
            rowi = data.iloc[i]

            # query parameters should reflect exactly one patient
            qry = and_query(column_query(BIRTH, rowi[BIRTH]),
                            column_query(SEX, rowi[SEX]),
                            column_query(GLOB_BEGIN, rowi[GLOB_BEGIN]),
                            column_query(GLOB_END, rowi[GLOB_END]),
                            column_query(FA_BEGIN, rowi[FA_BEGIN]),
                            column_query(DIAGNR, rowi[DIAGNR]))

            patient_data = data.query(qry)
            patient_data_ = self._clean_patient_data(patient_data)
            yield i, patient_data, patient_data_

            data = data.drop(index=patient_data.iloc[1:].index)

            i += 1
Exemplo n.º 4
0
    def inter_arrival_fit(self,
                          classes: Optional[List[int]] = None,
                          distributions: Optional[List[Callable[
                              [Union[List[float], np.ndarray, pd.Series]],
                              Union[Hypererlang, scipy.stats.expon]]]] = None,
                          filename="inter_arrival_fit") -> List[HospitalSpecs]:
        """compute inter arrival fit distributions from data.

        :param classes: The classes to include, if empty include all.
        :param distributions: Callables which return fitted distributions to data.
        :param filename: Filename for plot.

        :return: A numpy array holding the distributions for each ward and class.
        If multiple distributions are given, a numpy.zero array will be returned.
        """

        if classes is None:
            if hasattr(self.analyser, "classes"):
                classes = self.analyser.classes
            else:
                classes = [0]

        if distributions is None:
            distributions = [fit_expon]

        arrivals = [
            np.zeros((len(self.analyser.wards), len(classes), 2), dtype="O")
            for _ in range(len(distributions))
        ]

        for j, origin in enumerate([EXTERNAL, INTERNAL, [INTERNAL, EXTERNAL]]):
            for ward in self.analyser.wards:
                for i, class_ in enumerate(classes):
                    qry = and_query(column_query(CURRENT_WARD, ward),
                                    column_query(CURRENT_CLASS, class_))
                    class_data = self.analyser.data.query(qry).dropna(
                        subset=[BEGIN, END])
                    class_data["Arrival"] = self.analyser.make_inter_arrival(
                        class_data, pre_ward=[origin])
                    if ward == "PACU":
                        class_data = drop_week_arrival(class_data, week=True)
                    arrival_data = class_data["Arrival"].dropna()
                    distribution_fits: List[Union[Hypererlang,
                                                  scipy.stats.expon]] = []
                    if not arrival_data.empty:
                        for k, distribution_ in enumerate(distributions):
                            distribution_fits.append(
                                distribution_(arrival_data))
                            if j in [0, 1]:
                                arrivals[k][self.analyser.wards_map[ward], i,
                                            j] = distribution_fits[0]
                        title = f"ward: {ward}, class: {int(class_)}, origin: {origin}"
                        plot_distribution_fit(arrival_data,
                                              distribution_fits,
                                              title=title)
                        d = ", ".join([d.name for d in distribution_fits])
                        filename_ = filename + f" - distributions[{d}] - ward[{ward}] - " \
                                               f"class[{int(class_)}] - origin[{origin}].pdf"
                        plt.savefig(self.output_dir.joinpath(filename_))
                        plt.close()

        self.hospital_specs.set_arrival(arrivals[0])
        hospital_specs = [
            self.hospital_specs.copy() for _ in range(len(distributions))
        ]
        for specs, arrival in zip(hospital_specs, arrivals):
            specs.set_arrival(arrival)

        return hospital_specs
Exemplo n.º 5
0
    def service_fit(
        self,
        classes: Optional[List[int]] = None,
        distributions: Optional[List[
            Callable[[Union[List[float], np.ndarray, pd.Series]],
                     Union[Hypererlang, scipy.stats.expon]]]] = None,
        filename="service_fit",
    ) -> List[HospitalSpecs]:
        """Compute service fit distributions from data.

        :param classes: The classes to include, if empty include all.
        :param distributions: Callables which return fitted distributions to data.
        :param filename: The filename for plot saving.

        :return: A numpy array holding the distributions for each ward and class.
        If multiple distributions are given, a numpy.zero array will be returned.
        """

        if classes is None:
            if hasattr(self.analyser, "classes"):
                classes = self.analyser.classes
            else:
                classes = [0]

        if distributions is None:
            distributions = [fit_expon]

        services = [
            np.zeros((len(self.analyser.wards), len(classes)), dtype="O")
            for _ in range(len(distributions))
        ]

        self.analyser.make_service()

        self.logger.info(f"Modell for service.")

        for ward in self.analyser.wards:
            for i, class_ in enumerate(classes):
                qry = and_query(column_query(CURRENT_WARD, ward),
                                column_query(CURRENT_CLASS, class_))
                class_data = self.analyser.data.query(qry)
                service_data = class_data[SERVICE].dropna()
                distribution_fits: List[Union[Hypererlang,
                                              scipy.stats.expon]] = []
                if not service_data.empty:
                    for j, distribution_ in enumerate(distributions):
                        distribution_fit = distribution_(service_data)
                        distribution_fits.append(distribution_fit)

                        title = f"Ward: {ward}, Class: {int(class_)}"
                        plot_distribution_fit(service_data, [distribution_fit],
                                              title=title)
                        filename_ = filename.format(distribution_fit.name,
                                                    ward, int(class_))
                        plt.savefig(
                            self.output_dir.joinpath(f"{filename_}.pdf"))
                        plt.close()

                        services[j][self.analyser.wards_map[ward],
                                    i] = distribution_fit

                    title = f"ward: {ward}, class: {int(class_)}"
                    plot_distribution_fit(service_data,
                                          distribution_fits,
                                          title=title)
                    d = ", ".join([d.name for d in distribution_fits])
                    filename_ = filename + f" - distributions[{d}] - ward[{ward}] - " \
                                           f"class[{int(class_)}].pdf"
                    plt.savefig(self.output_dir.joinpath(filename_))
                    plt.close()

        self.hospital_specs.set_service(services[0])
        hospital_specs = [
            self.hospital_specs.copy() for _ in range(len(distributions))
        ]
        for specs, service in zip(hospital_specs, services):
            specs.set_service(service)

        return hospital_specs