def smoothed_hazard_confidence_intervals_(self, bandwidth, hazard_=None): """ Parameter: bandwidth: the bandwith to use in the Epanechnikov kernel. hazard_: a computed (n,) numpy array of estimated hazard rates. If none, uses naf.smoothed_hazard_ """ if hazard_ == None: hazard_ = self.smoothed_hazard_(bandwidth).values[:, 0] timeline = self.timeline alpha2 = inv_normal_cdf(1 - (1 - self.alpha) / 2) name = "smoothed-" + self.cumulative_hazard_.columns[0] self._cumulative_sq.iloc[0] = 0 var_hazard_ = self._cumulative_sq.diff().fillna(self._cumulative_sq.iloc[0]) C = var_hazard_.values != 0.0 # only consider the points with jumps std_hazard_ = np.sqrt( 1.0 / (2 * bandwidth ** 2) * np.dot( epanechnikov_kernel(timeline[:, None], timeline[C][None, :], bandwidth) ** 2, var_hazard_.values[C] ) ) values = { "%s_upper_%.2f" % (name, self.alpha): hazard_ * np.exp(alpha2 * std_hazard_ / hazard_), "%s_lower_%.2f" % (name, self.alpha): hazard_ * np.exp(-alpha2 * std_hazard_ / hazard_), } return pd.DataFrame(values, index=timeline)
def smoothed_hazards_(self, bandwidth=1): """ Using the epanechnikov kernel to smooth the hazard function, with sigma/bandwidth """ return pd.DataFrame(np.dot(epanechnikov_kernel(self.timeline[:, None], self.timeline, bandwidth), self.hazards_.values), columns=self.hazards_.columns, index=self.timeline)
def smoothed_hazards_(self, bandwith=1): """ Using the epanechnikov kernel to smooth the hazard function, with sigma/bandwith """ C = self.censorship.astype(bool) return pd.DataFrame( np.dot(epanechnikov_kernel(self.timeline[:,None], self.timeline[C],bandwith), self.hazards_.values[C,:]), columns=self.hazards_.columns, index=self.timeline)
def smoothed_hazards_(self, bandwidth=1): """ Using the epanechnikov kernel to smooth the hazard function, with sigma/bandwidth """ return pd.DataFrame(np.dot( epanechnikov_kernel(self.timeline[:, None], self.timeline, bandwidth), self.hazards_.values), columns=self.hazards_.columns, index=self.timeline)
def smoothed_hazard_(self, bandwidth): """ bandwidth: the bandwith used in the Epanechnikov kernel. """ timeline = self.timeline cumulative_hazard_name = self.cumulative_hazard_.columns[0] hazard_name = "smoothed-" + cumulative_hazard_name hazard_ = self.cumulative_hazard_.diff().fillna(self.cumulative_hazard_.iloc[0] ) C = (hazard_[cumulative_hazard_name] != 0.0).values return pd.DataFrame( 1./(2*bandwidth)*np.dot(epanechnikov_kernel(timeline[:,None], timeline[C][None,:],bandwidth), hazard_.values[C,:]), columns=[hazard_name], index=timeline)
def smoothed_hazard_(self, bandwidth): """ Parameters: bandwidth: the bandwith used in the Epanechnikov kernel. Returns: a DataFrame of the smoothed hazard """ timeline = self.timeline cumulative_hazard_name = self.cumulative_hazard_.columns[0] hazard_name = "differenced-" + cumulative_hazard_name hazard_ = self.cumulative_hazard_.diff().fillna(self.cumulative_hazard_.iloc[0]) C = (hazard_[cumulative_hazard_name] != 0.0).values return pd.DataFrame(1. / bandwidth * np.dot(epanechnikov_kernel(timeline[:, None], timeline[C][None, :], bandwidth), hazard_.values[C, :]), columns=[hazard_name], index=timeline)
def smoothed_hazard_confidence_intervals_(self, bandwidth, hazard_=None): """ Parameter: bandwidth: the bandwith to use in the Epanechnikov kernel. hazard_: a computed (n,) numpy array of estimated hazard rates. If none, uses naf.smoothed_hazard_ """ if hazard_ is None: hazard_ = self.smoothed_hazard_(bandwidth).values[:, 0] timeline = self.timeline alpha2 = inv_normal_cdf(1 - (1 - self.alpha) / 2) self._cumulative_sq.iloc[0] = 0 var_hazard_ = self._cumulative_sq.diff().fillna(self._cumulative_sq.iloc[0]) C = (var_hazard_.values != 0.0) # only consider the points with jumps std_hazard_ = np.sqrt(1. / (bandwidth ** 2) * np.dot(epanechnikov_kernel(timeline[:, None], timeline[C][None, :], bandwidth) ** 2, var_hazard_.values[C])) values = { self.ci_labels[0]: hazard_ * np.exp(alpha2 * std_hazard_ / hazard_), self.ci_labels[1]: hazard_ * np.exp(-alpha2 * std_hazard_ / hazard_) } return pd.DataFrame(values, index=timeline)