def UpdateData(self): to_long_format(self._df, self.event_name) self.event = self._df[self.event_name] self.duration = self._df[self.duration_name] if np.unique(self.event.to_numpy()).size != 2: mylog.error('Key \'{}\' must have only two values'.format( self.event_name)) raise ValueError new_df = self.df.drop(columns=[self.event_name, self.duration_name], inplace=False) self._array = new_df.values self._case_name = list(new_df.index) self._feature_name = list(new_df.columns)
def seed_df(self): df = pd.DataFrame.from_records([{ "id": 1, "var1": 0.1, "T": 10, "E": 1 }, { "id": 2, "var1": 0.5, "T": 12, "E": 0 }]) return utils.to_long_format(df, "T")
def seed_df(self): df = pd.DataFrame.from_records([{ 'id': 1, 'var1': 0.1, 'T': 10, 'E': 1 }, { 'id': 2, 'var1': 0.5, 'T': 12, 'E': 0 }]) return utils.to_long_format(df, 'T')
def seed_df(self): df = pd.DataFrame.from_records([ {'id': 1, 'var1': 0.1, 'T': 10, 'E': 1}, {'id': 2, 'var1': 0.5, 'T': 12, 'E': 0} ]) return utils.to_long_format(df, 'T')
df = pd.DataFrame([ [1, 3, True, 1], [6, 4, False, 0], [3, 5, True, 1], [2, 5, False, 1], [4, 6, True, 1], [7, 7, True, 0], [8, 8, False, 0], [5, 8, False, 1], [9, 9, True, 0], [10, 10, True, 0], ], columns=['id', 'time', 'event', 'group']) df = to_long_format(df, 'time') cv = pd.DataFrame.from_records([ {"id": 1, "z": 0, "time": 0}, {"id": 6, "z": 1, "time": 0}, {"id": 3, "z": 1, "time": 0}, {"id": 2, "z": 0, "time": 0}, {"id": 4, "z": 0, "time": 0}, {"id": 7, "z": 0, "time": 0}, {"id": 8, "z": 0, "time": 0}, {"id": 5, "z": 0, "time": 0}, {"id": 9, "z": 0, "time": 0}, {"id": 10, "z": 0, "time": 0}, {"id": 1, "z": 0, "time": 3}, {"id": 6, "z": 1, "time": 3},
df = pd.DataFrame([ [1, 3, True, 1], [6, 4, False, 0], [3, 5, True, 1], [2, 5, False, 1], [4, 6, True, 1], [7, 7, True, 0], [8, 8, False, 0], [5, 8, False, 1], [9, 9, True, 0], [10, 10, True, 0], ], columns=['id', 'time', 'event', 'group']) df = to_long_format(df, 'time') cv = pd.DataFrame.from_records([ { "id": 1, "z": 0, "time": 0 }, { "id": 6, "z": 1, "time": 0 }, { "id": 3, "z": 1,
[1, 3, True, 1], [6, 4, False, 0], [3, 5, True, 1], [2, 5, False, 1], [4, 6, True, 1], [7, 7, True, 0], [8, 8, False, 0], [5, 8, False, 1], [9, 9, True, 0], [10, 10, True, 0], ], columns=["id", "time", "event", "group"], ) df = to_long_format(df, "time") cv = pd.DataFrame.from_records( [ {"id": 1, "z": 0, "time": 0}, {"id": 6, "z": 1, "time": 0}, {"id": 3, "z": 1, "time": 0}, {"id": 2, "z": 0, "time": 0}, {"id": 4, "z": 0, "time": 0}, {"id": 7, "z": 0, "time": 0}, {"id": 8, "z": 0, "time": 0}, {"id": 5, "z": 0, "time": 0}, {"id": 9, "z": 0, "time": 0}, {"id": 10, "z": 0, "time": 0}, {"id": 1, "z": 0, "time": 3}, {"id": 6, "z": 1, "time": 3},
# -*- coding: utf-8 -*- if __name__ == "__main__": import time import pandas as pd from lifelines import CoxTimeVaryingFitter from lifelines.datasets import load_rossi from lifelines.utils import to_long_format df = load_rossi() df = pd.concat([df] * 20) df = df.reset_index() df = to_long_format(df, duration_col="week") ctv = CoxTimeVaryingFitter() start_time = time.time() ctv.fit(df, id_col="index", event_col="arrest", start_col="start", stop_col="stop") time_took = time.time() - start_time print("--- %s seconds ---" % time_took) ctv.print_summary()
# Merge fund_cv DataFrame # df = df.merge(fund_cv.drop(columns='name'), on='id', how='inner') # Reduce to just companies for which we have relevant information df = df[df.id.isin(fund_cv.id.unique())] # SMALL SUBSET TEST CODE: # dr = df[(df.name == 'Twitter') | (df.name == 'Facebook')] # lr = to_long_format(dr, duration_col='age_at_exit') # rr = fund_cv.loc[(fund_cv.name == 'Twitter') | (fund_cv.name == 'Facebook'), # ['id', 'raised_amount_usd', 'time_to_funding']] # lr = add_covariate_to_timeline(lr, rr, 'id', 'time_to_funding', 'success', cumulative_sum=True) #------------------------------------------------------------------------------ # Prepare DataFrame for lifelines analysis #------------------------------------------------------------------------------ lf = to_long_format(df, 'age_at_exit') # Piped version (possibly faster?) # lf = df.pipe(to_long_format, 'age_at_exit')\ # .pipe(add_covariate_to_timeline(fund_cv_amt, # 'id', 'time_to_funding', 'success', # cumulative_sum=False)\ # NOTE WARNING THIS LINE IS SUPER F*****G SLOW. # Add raised_amount_usd as time-varying covariate fund_cv_amt = fund_cv[['id', 'raised_amount_usd', 'time_to_funding']] lf = add_covariate_to_timeline(lf, fund_cv_amt, 'id', 'time_to_funding', 'success',
[ [1, 3, True, 1], [6, 4, False, 0], [3, 5, True, 1], [2, 5, False, 1], [4, 6, True, 1], [7, 7, True, 0], [8, 8, False, 0], [5, 8, False, 1], [9, 9, True, 0], [10, 10, True, 0], ], columns=["id", "time", "event", "group"], ) df = to_long_format(df, "time") cv = pd.DataFrame.from_records([ { "id": 1, "z": 0, "time": 0 }, { "id": 6, "z": 1, "time": 0 }, { "id": 3, "z": 1,