def load_physio(feature_funcs): description = """ Loads a dataset containing physiological data. It returns a Bunch with the following attributes: - data: n_samples by n_features 2D array. There are 7890 samples (patient episodes), and 52 features (mean, standard deviation, max and min for each of 13 physiological variables) - c_target: target variable to be used for classification: mortality. 0 indicates survival, 1 indicates death. - r_target: target variable to be used for regression: length of stay in days. - DESCR: a description of this dataset (for now, the docstring of this function) """ headers, episodes = datafiles.load_episodes() headers, outcomes = datafiles.load_outcomes() # don't care about headers # construct the prediction dataset num_samples = len(episodes) num_features = len(feature_funcs) * datafiles.NUM_VARS data = np.zeros((num_samples, num_features)) # for each sample (episode) for i, (episode_id, episode_data) in enumerate(episodes): # compute its features (stats for each physiological variable) for v in range(datafiles.NUM_VARS): start = v * len(feature_funcs) end = (v + 1) * len(feature_funcs) data[i, start:end] = _compute_features(episode_data[v], feature_funcs) # obtain the target variables episode_ids, episode_outcomes = unzip(outcomes) los, mortality, med_los = map(np.asarray, unzip(episode_outcomes)) # mortality, for classification c_target = mortality # length of stay, for regression r_target = np.floor(los / (24 * 60 * 60)) # convert from secs to days result = Bunch(data=data, r_target=r_target, c_target=c_target, DESCR=description) return result
def load_physio(feature_funcs): description = """ Loads a dataset containing physiological data. It returns a Bunch with the following attributes: - data: n_samples by n_features 2D array. There are 7890 samples (patient episodes), and 52 features (mean, standard deviation, max and min for each of 13 physiological variables) - c_target: target variable to be used for classification: mortality. 0 indicates survival, 1 indicates death. - r_target: target variable to be used for regression: length of stay in days. - DESCR: a description of this dataset (for now, the docstring of this function) """ headers, episodes = datafiles.load_episodes() headers, outcomes = datafiles.load_outcomes() # don't care about headers # construct the prediction dataset num_samples = len(episodes) num_features = len(feature_funcs) * datafiles.NUM_VARS data = np.zeros((num_samples, num_features)) # for each sample (episode) for i, (episode_id, episode_data) in enumerate(episodes): # compute its features (stats for each physiological variable) for v in range(datafiles.NUM_VARS): start = v * len(feature_funcs) end = (v + 1) * len(feature_funcs) data[i, start:end] =_compute_features(episode_data[v], feature_funcs) # obtain the target variables episode_ids, episode_outcomes = unzip(outcomes) los, mortality, med_los = map(np.asarray, unzip(episode_outcomes)) # mortality, for classification c_target = mortality # length of stay, for regression r_target = np.floor(los / (24 * 60 * 60)) # convert from secs to days result = Bunch(data=data, r_target=r_target, c_target=c_target, DESCR=description) return result
def _compute_features(measurements, functions): """ Compute basic features for a time series of measurements """ features = np.zeros(len(functions)) if measurements: times, values = map(np.asarray, unzip(measurements)) for i, f in enumerate(functions): features[i] = f(values, times) return features
testing_data = data[training_size:training_size + testing_size] return training_data, testing_data training_data, testing_data = get_mnist_data(60000, 10000) evaluate, train, save = build_mnist_model() printer = Printer(0.1) encoder = OneHotEncoder(10) iterations = 5 batch_sizes = 5 for itr in range(iterations): # Preparing the training data np.random.shuffle(training_data) inputs, targets = unzip(training_data) input_batches = np.array_split(np.asarray(inputs), len(inputs) / batch_sizes) target_batches = np.array_split(np.asarray(targets), len(targets) / batch_sizes) if itr == 0: print 'training with', len(input_batches), 'batches of size', len( input_batches[0]) total_iteartions = len(input_batches) * iterations for i, (input_batch, target_batch) in enumerate(zip(input_batches, target_batches)): target_batch = encoder.encode(target_batch) error = train(input_batch, target_batch, 4) current_iteration = itr * len(input_batches) + i
training_data = data[0:training_size] testing_data = data[training_size:training_size+testing_size] return training_data, testing_data training_data, testing_data = get_mnist_data(60000, 10000) evaluate, train, save = build_mnist_model() printer = Printer(0.1) encoder = OneHotEncoder(10) iterations = 5 batch_sizes = 5 for itr in range(iterations): # Preparing the training data np.random.shuffle(training_data) inputs, targets = unzip(training_data) input_batches = np.array_split(np.asarray(inputs), len(inputs) / batch_sizes) target_batches = np.array_split(np.asarray(targets), len(targets) / batch_sizes) if itr == 0: print 'training with', len(input_batches), 'batches of size', len(input_batches[0]) total_iteartions = len(input_batches) * iterations for i, (input_batch, target_batch) in enumerate(zip(input_batches, target_batches)): target_batch = encoder.encode(target_batch) error = train(input_batch, target_batch, 4) current_iteration = itr*len(input_batches) + i printer.overwrite('training ' + str(int(current_iteration * 100. / total_iteartions)) + '% - error:' + str(error)) printer.clear()