Exemplo n.º 1
0
# Produce some simulated survival data from a weird hazard function
import numpy
from samplers import HazardSampler

# Set a random seed and sample size
numpy.random.seed(1)
m = 1000

# Use this totally crazy hazard function
hazard = lambda t: numpy.exp(numpy.sin(t) - 2.0)

# Sample failure times from the hazard function
sampler = HazardSampler(hazard)
failure_times = numpy.array([sampler.draw() for _ in range(m)])

# Apply some non-informative right censoring, just to demonstrate how it's done
censor_times = numpy.random.uniform(0.0, 25.0, size=m)
y = numpy.minimum(failure_times, censor_times)
c = 1.0 * (censor_times > failure_times)

# Make some plots of the simulated data
from matplotlib import pyplot
from statsmodels.distributions import ECDF

# Plot a histogram of failure times from this hazard function
pyplot.hist(failure_times, bins=50)
pyplot.title('Uncensored Failure Times')
pyplot.savefig('uncensored_hist.png', transparent=True)
pyplot.show()

# Plot a histogram of censored failure times from this hazard function
Exemplo n.º 2
0
import time
numpy.seterr(all='raise')
numpy.random.seed(1)


m = 1000
data_filename = 'log_hazard_data' + str(m) + '.pickle'
modelfilename = 'log_hazard_model' + str(m) + '.pickle'
redo = False
if os.path.exists(data_filename) and not redo:
    with open(data_filename, 'r') as infile:
        m, y, c, censor_times, failure_times = pickle.load(infile)
else:
    censor_times = numpy.random.uniform(0.0, 100.0, size=m)
    baseline_hazard = lambda t: numpy.exp(numpy.sin(t) - 2.0)
    sampler = HazardSampler(baseline_hazard, 10.0, 20.0)
    failure_times = numpy.array([sampler.draw() for _ in range(m)])
    y = numpy.minimum(failure_times, censor_times)
    c = 1.0 * (censor_times > failure_times)
    with open(data_filename, 'w') as outfile:
        pickle.dump((m, y, c, censor_times, failure_times), outfile)
pyplot.hist(y, bins=50)
pyplot.show()
t0 = time.time()
model = GeneralizedRegressor(base_regressor=Earth(thresh=1e-7, max_terms=100, smooth=True, allow_linear=False, penalty=0), loss_function=MidpointLogHazardLossFunction(10))
model.fit(X=None,y=y,c=c)
with open(modelfilename, 'w') as outfile:
    pickle.dump(model, outfile)
t1 = time.time()
print 'Total fitting time: %f seconds' % (t1 - t0)
t = numpy.arange(0.0, 30.0, .1)