def test_is_duration_events(self): schema = StructType([ StructField("patientID", IntegerType(), True), StructField("start", TimestampType(), True), StructField("end", TimestampType(), True), ]) patients_pd = pd.DataFrame({"patientID": [1, 2, 3]}) patients = self.spark.createDataFrame(patients_pd) cohort1 = Cohort("patients", "patients", patients, None) self.assertFalse(cohort1.is_duration_events()) data = [(1, datetime(1993, 10, 9), datetime(1993, 10, 9))] events = self.spark.createDataFrame(data=data, schema=schema) cohort2 = Cohort("patients", "patients", patients, events) self.assertTrue(cohort2.is_duration_events()) data = [(1, datetime(1993, 10, 9), None), (2, datetime(1993, 10, 9), None)] events = self.spark.createDataFrame(data=data, schema=schema) cohort2 = Cohort("patients", "patients", patients, events) self.assertFalse(cohort2.is_duration_events())
def plot_mean_duration_per_value(figure: Figure, cohort: Cohort) -> Figure: assert cohort.is_duration_events() df = agg_by_col(cohort.events, frozenset(["value"]), "duration", "mean").sort_values("value") ax = figure.gca() ax.barh(range(len(df.value)), df["avg(duration)"].values) ax.set_yticklabels(df.value.values) ax.set_yticks(range(len(df.value))) return figure
def plot_duration_distribution_per_month_as_bar(figure: Figure, cohort: Cohort) -> Figure: assert cohort.is_duration_events() df = event_duration_agg(cohort, "count").sort_values("duration") df.duration = np.ceil(df.duration / 30) df.duration = df.duration.astype("int32") df = df.groupby("duration").sum().reset_index() ax = figure.gca() ax.bar(range(len(df)), df["count(1)"].values) ax.set_xticklabels(df.duration.values) ax.set_xticks(range(len(df))) return figure
def plot_duration_distribution_per_day_as_line(figure: Figure, cohort: Cohort) -> Figure: assert cohort.is_duration_events() df = event_duration_agg(cohort, "count").sort_values("duration") ax = figure.gca() ax.plot(df.duration, df["count(1)"]) ax.set_yscale("log") major = IndexLocator(365, +0.0) minor = IndexLocator(30, +0.0) ax.xaxis.set_minor_locator(minor) ax.xaxis.set_major_locator(major) ax.grid(True, which="major", axis="x") return figure