def mpd_hiss(client, args): logging.info("Connecting to MPD...") client.connect(args.host, args.port) logging.debug("Connected.") if args.password is not None: try: logging.debug("Authenticating...") client.password(args.password) logging.debug("Authenticated.") except mpd.CommandError as e: raise AuthError(e) last_status = client.status() icon_cache = { 'last_dir': None, 'last_image': None, 'default': growl_icon, } while True: client.send_idle("player") client.fetch_idle() status = client.status() started_playing = (last_status["state"] != "play" and status["state"] == "play") last_songid = last_status.get("songid", None) songid = status.get("songid", None) track_changed = songid not in (None, last_songid) if started_playing or track_changed: song = client.currentsong() icon = album_art(icon_cache, get_album_dir(song.get("file"), args.album_art), args.scale_icons) song_data = { "artist": song.get("artist", "Unknown artist"), "title": (song.get("title") or basename(song.get("file")) or "Unknown track"), "album": song.get("album", ""), "duration": hms(int(song.get("time", 0))) } logging.info("Sending Now Playing notification for " "{artist} - [{album}] {title}.".format(**song_data)) description = args.description_format.format(**song_data) notify(title=args.title_format.format(**song_data), description=description.rstrip("\n"), icon=icon) last_status = status
def main(): def training(num_batches, batch_size, x_train, label_train, mask_train): for i in range(num_batches): idx = range(i * batch_size, (i + 1) * batch_size) x_batch = x_train[idx] y_batch = label_train[idx] mask_batch = mask_train[idx] loss, out, batch_norm = train(x_batch, y_batch, mask_batch) norms.append(batch_norm) preds.append(out) losses.append(loss) predictions = np.concatenate(preds, axis=0) loss_train = np.mean(losses) all_losses_train.append(loss_train) acc_train = utils.proteins_acc(predictions, label_train[0:num_batches * batch_size], mask_train[0:num_batches * batch_size]) print('acc_train: ', acc_train) all_accuracy_train.append(acc_train) mean_norm = np.mean(norms) all_mean_norm.append(mean_norm) print " average training loss: %.5f" % loss_train print " average training accuracy: %.5f" % acc_train print " average norm: %.5f" % mean_norm def testing(num_batches, batch_size, X, y, mask): for i in range(num_batches): idx = range(i * batch_size, (i + 1) * batch_size) x_batch = X[idx] y_batch = y[idx] mask_batch = mask[idx] loss, out = evaluate(x_batch, y_batch, mask_batch) preds.append(out) losses.append(loss) predictions = np.concatenate(preds, axis=0) loss_eval = np.mean(losses) all_losses.append(loss_eval) acc_eval = utils.proteins_acc(predictions, y, mask) all_accuracy.append(acc_eval) print("Average evaluation loss ({}): {:.5f}".format(subset, loss_eval)) print("Average evaluation accuracy ({}): {:.5f}".format( subset, acc_eval)) return i global momentum_schedule, momentum, i sym_y = T.imatrix('target_output') sym_mask = T.matrix('mask') sym_x = T.tensor3() tol = 1e-5 num_epochs = config.epochs batch_size = config.batch_size print("Building network ...") # DEBUG # l_in, l_out = config.build_model() # DEBUG # all_layers = las.layers.get_all_layers(l_out) num_params = las.layers.count_params(l_out) print(" number of parameters: %d" % num_params) print(" layer output shapes:") # output for debugging (names and dimensions) # InputLayer(None, 700, 42) for layer in all_layers: name = string.ljust(layer.__class__.__name__, 32) print(" %s %s" % (name, las.layers.get_output_shape(layer))) print("Creating cost function") # lasagne.layers.get_output produces a variable for the output of the net out_train = las.layers.get_output(l_out, sym_x, deterministic=False) print('out_train: ', out_train) print("Creating eval function") out_eval = las.layers.get_output(l_out, sym_x, deterministic=True) probs_flat = out_train.reshape((-1, num_classes)) print("probs_flat: ", probs_flat) lambda_reg = config.lambda_reg params = las.layers.get_all_params(l_out, regularizable=True) reg_term = sum(T.sum(p**2) for p in params) cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, tol, 1 - tol), sym_y.flatten()) print('cost: ', cost) cost = T.sum( cost * sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term print('cost_2: ', cost) # Retrieve all parameters from the network all_params = las.layers.get_all_params(l_out, trainable=True) # Compute SGD updates for training print("Computing updates ...") if hasattr(config, 'learning_rate_schedule'): learning_rate_schedule = config.learning_rate_schedule # Import learning rate schedule # else: # learning_rate_schedule = {0: config.learning_rate} learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) all_grads = T.grad(cost, all_params) cut_norm = config.cut_grad updates, norm_calc = las.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True) if optimizer == "rmsprop": updates = las.updates.rmsprop(updates, all_params, learning_rate) else: sys.exit("please choose <rmsprop> in configfile") # Theano functions for training and computing cost print "config.batch_size %d" % batch_size print "data.num_classes %d" % num_classes if hasattr(config, 'build_model'): print("has build model") print("Compiling train ...") # Use this for training (see deterministic = False above) train = theano.function([sym_x, sym_y, sym_mask], [cost, out_train, norm_calc], updates=updates) print("Compiling eval ...") # use this for eval (deterministic = True + no updates) evaluate = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval]) # Start timers start_time = time.time() prev_time = start_time all_losses_train = [] all_accuracy_train = [] all_losses_eval_train = [] all_losses_eval_valid = [] all_losses_eval_test = [] all_accuracy_eval_train = [] all_accuracy_eval_valid = [] all_accuracy_eval_test = [] all_mean_norm = [] import Data_Manipulator x_train, x_valid, label_train, label_valid, mask_train, mask_valid, num_seq_train = Data_Manipulator.get_train( ) # print("y shape") # print(label_valid.shape) # print("x_test shape") # print(x_valid.shape) # Start training for epoch in range(num_epochs): if (epoch % 10) == 0: print "Epoch %d of %d" % (epoch + 1, num_epochs) if epoch in learning_rate_schedule: lr = np.float32(learning_rate_schedule[epoch]) print " setting learning rate to %.7f" % lr learning_rate.set_value(lr) # print "Shuffling data" seq_names = np.arange(0, num_seq_train) np.random.shuffle(seq_names) x_train = x_train[seq_names] label_train = label_train[seq_names] mask_train = mask_train[seq_names] num_batches = num_seq_train // batch_size # integer division losses = [] preds = [] norms = [] training(num_batches, batch_size, x_train, label_train, mask_train) sets = [('valid', x_valid, label_valid, mask_valid, all_losses_eval_valid, all_accuracy_eval_valid)] for subset, X, y, mask, all_losses, all_accuracy in sets: print " validating: %s loss" % subset preds = [] num_batches = np.size(X, axis=0) // config.batch_size testing(num_batches, batch_size, X, y, mask) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * num_epochs eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0): print " saving parameters and metadata" with open((metadata_path + "-%d" % epoch + ".pkl"), 'w') as f: pickle.dump( { 'config_name': config_name, 'param_values': las.layers.get_all_param_values(l_out), 'losses_train': all_losses_train, 'accuracy_train': all_accuracy_train, 'losses_eval_train': all_losses_eval_train, 'losses_eval_valid': all_losses_eval_valid, 'losses_eval_test': all_losses_eval_test, 'accuracy_eval_valid': all_accuracy_eval_valid, 'accuracy_eval_train': all_accuracy_eval_train, 'accuracy_eval_test': all_accuracy_eval_test, 'mean_norm': all_mean_norm, 'time_since_start': time_since_start }, f, pickle.HIGHEST_PROTOCOL) print " stored in %s" % metadata_path
res_df.metric_eval_valid.max(), res_df.metric_eval_valid.iloc[-1] ) model_arch += '\nBEST/LAST ACC TRAIN: %.2f - %.2f.\n' % ( res_df.acc_eval_train.max() * 100, res_df.acc_eval_train.iloc[-1] * 100 ) model_arch += 'BEST/LAST ACC VALID: %.2f - %.2f.\n' % ( res_df.acc_eval_valid.max() * 100, res_df.acc_eval_valid.iloc[-1] * 100 ) model_arch += '\nTOTAL TRAINING TIME: %s' % \ hms(model_data['time_since_start']) #print model_arch train_conf_mat, hist_rater_a, \ hist_rater_b, train_nom, \ train_denom = model_data['metric_extra_eval_train'][-1] valid_conf_mat, hist_rater_a, \ hist_rater_b, valid_nom, \ valid_denom = model_data['metric_extra_eval_valid'][-1] # Normalised train confusion matrix (with argmax decoding). #print train_conf_mat / train_conf_mat.sum() # Normalised validation confusion matrix (with argmax decoding). #print valid_conf_mat / valid_conf_mat.sum() chunk_size = model_data['chunk_size'] * 2 batch_size = model_data['batch_size']
for obj_idx, obj_name in enumerate(config().order_objectives): valid_mean = np.mean(tmp_losses_valid[obj_name]) losses_eval_valid[obj_name] = valid_mean means.append(valid_mean) print obj_name, valid_mean print 'Sum of mean losses:', sum(means) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if ((chunk_idx + 1) % config().save_every) == 0: print print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks) print 'Saving metadata, parameters' with open(metadata_path, 'w') as f: pickle.dump({ 'configuration_file': config_name, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': chunk_idx, 'losses_eval_train': losses_eval_train,
prefix_test=img_dir, transfo_params=transfos, paired_transfos=paired_transfos, ) all_outputs = [] for i in range(tta_times): start_time = time.time() print("\t\t\tTTA %i OF %i...\n\n" % (i + 1, tta_times)) pred = do_pred(test_gen) all_outputs.append(pred) time_since_start = time.time() - start_time print("\nOne TTA iteration took %s.\n" % \ hms(time_since_start)) print("Estimated %s to go...\n\n" % \ hms((tta_times - (i + 1)) * time_since_start)) print("\n\nDone doing TTA predictions! Ensembling ...\n") if tta_ensemble_method == 'mean': outputs = np.mean(np.asarray(all_outputs), axis=0) elif tta_ensemble_method == 'log_mean': outputs = np.mean(np.log(1e-5 + np.asarray(all_outputs)), axis=0) subm_fn = 'subm/' + "%s--%s[%s][%s][%i][%s].csv" % \ (model_data['model_id'], model_data['configuration'], dataset, tta_transfos, tta_times,
valid_mean = np.mean(tmp_losses_valid[obj_name]) losses_eval_valid[obj_name] = valid_mean means.append(valid_mean) print obj_name, valid_mean print 'Sum of mean losses:', sum(means) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if ((chunk_idx + 1) % config().save_every) == 0: print print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks) print 'Saving metadata, parameters' with open(metadata_path, 'w') as f: pickle.dump( { 'configuration_file': config_name, 'git_revision_hash': utils.get_git_revision_hash(),
def predict_slice_model(expid, outfile, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) _check_slicemodel(input_layers) # Print the architecture _print_architecture(top_layer) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = range(1, num_chunks+1) create_test_gen = partial(config().create_test_gen, required_input_keys = xs_shared.keys(), required_output_keys = ["patients", "slices"], ) print "Generate predictions with this model" start_time = time.time() prev_time = start_time predictions = [{"patient": i+1, "slices": { slice_id: { "systole": np.zeros((0,600)), "diastole": np.zeros((0,600)) } for slice_id in data_loader.get_slice_ids_for_patient(i+1) } } for i in xrange(NUM_PATIENTS)] # Loop over data and generate predictions for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())): print " load testing data onto GPU" for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] slice_ids = test_data["output"]["slices"] print " patients:", " ".join(map(str, patient_ids)) print " chunk %d/%d" % (e, num_chunks) for b in xrange(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))} kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64') for idx, (patient_id, slice_id) in enumerate( zip(patient_ids[b*config().batch_size:(b+1)*config().batch_size], slice_ids[b*config().batch_size:(b+1)*config().batch_size])): if patient_id != 0: index = patient_id-1 patient_data = predictions[index] assert patient_id==patient_data["patient"] patient_slice_data = patient_data["slices"][slice_id] patient_slice_data["systole"] = np.concatenate((patient_slice_data["systole"], kaggle_systoles[idx:idx+1,:]),axis=0) patient_slice_data["diastole"] = np.concatenate((patient_slice_data["diastole"], kaggle_diastoles[idx:idx+1,:]),axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print # Average predictions already_printed = False for prediction in predictions: for prediction_slice_id in prediction["slices"]: prediction_slice = prediction["slices"][prediction_slice_id] if prediction_slice["systole"].size>0 and prediction_slice["diastole"].size>0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction_slice["systole_average"] = average_method(prediction_slice["systole"]) prediction_slice["diastole_average"] = average_method(prediction_slice["diastole"]) try: test_if_valid_distribution(prediction_slice["systole_average"]) test_if_valid_distribution(prediction_slice["diastole_average"]) except: if not already_printed: print "WARNING: These distributions are not distributions" already_printed = True prediction_slice["systole_average"] = make_monotone_distribution(prediction_slice["systole_average"]) prediction_slice["diastole_average"] = make_monotone_distribution(prediction_slice["diastole_average"]) print "Calculating training and validation set scores for reference" # Add CRPS scores to the predictions # Iterate over train and validation sets for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: # Iterate over patients in the set for patient in patient_ids: prediction = predictions[patient-1] # Iterate over the slices for slice_id in prediction["slices"]: prediction_slice = prediction["slices"][slice_id] if "systole_average" in prediction_slice: assert patient == regular_labels[patient-1, 0] error_sys = CRSP(prediction_slice["systole_average"], regular_labels[patient-1, 1]) prediction_slice["systole_CRPS"] = error_sys prediction_slice["target_systole"] = regular_labels[patient-1, 1] error_dia = CRSP(prediction_slice["diastole_average"], regular_labels[patient-1, 2]) prediction_slice["diastole_CRPS"] = error_dia prediction_slice["target_diastole"] = regular_labels[patient-1, 2] prediction_slice["CRPS"] = 0.5 * error_sys + 0.5 * error_dia print "dumping prediction file to %s" % outfile with open(outfile, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions_per_slice': predictions, }, f, pickle.HIGHEST_PROTOCOL) print "prediction file dumped" return
def extract_rois(expid): metadata_path = MODEL_PATH + "%s.pkl" % config.model.__name__ assert os.path.exists(metadata_path) prediction_path = MODEL_PREDICTIONS_PATH + "%s.pkl" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Using" print " %s" % metadata_path print "To generate" print " %s" % prediction_path print "Build model" interface_layers = config.model.build_model(image_size=config.patch_shape) output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] for old_key, new_key in config.replace_input_tags.items(): input_layers[new_key] = input_layers.pop(old_key) # merge all output layers into a fictional dummy layer which is not actually used top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) # get all the trainable parameters from the model all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) # Count all the parameters we are actually optimizing, and visualize what the model looks like. print string.ljust(" layer output shapes:", 26), print string.ljust("#params:", 10), print string.ljust("#data:", 10), print "output shape:" def comma_seperator(v): return '{:,.0f}'.format(v) for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 22) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(comma_seperator(num_param), 10) num_size = string.ljust(comma_seperator(np.prod(layer.output_shape[1:])), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print " number of parameters: %d" % num_params xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for (key, l_in) in input_layers.iteritems(): givens[l_in.input_var] = xs_shared[key][idx*config.batch_size:(idx+1)*config.batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] print "Compiling..." iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print "Preparing dataloaders" config.data_loader.prepare() print "Load model parameters" metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, metadata['param_values']) start_time, prev_time = None, None import multiprocessing as mp jobs = [] for set in [VALIDATION, TRAIN, TEST]: set_indices = config.data_loader.indices[set] for _i, sample_id in enumerate(set_indices): if start_time is None: start_time = time.time() prev_time = start_time print "sample_id", sample_id, _i+1, "/", len(set_indices), "in", set filenametag = input_layers.keys()[0].split(":")[0] + ":patient_id" data = config.data_loader.load_sample(sample_id, input_layers.keys()+config.extra_tags+[filenametag],{}) patient_id = data["input"][filenametag] print patient_id seg_shape = output_layers["predicted_segmentation"].output_shape[1:] patch_gen = patch_generator(data, seg_shape, input_layers.keys()[0].split(":")[0]+":") t0 = time.time() preds = [] patches = [] for patch_idx, patch in enumerate(patch_gen): for key in xs_shared: xs_shared[key].set_value(patch[key][None,:]) print " patch_generator", time.time() - t0 t0 = time.time() th_result = iter_test(0) print " iter_test", time.time()-t0 predictions = th_result[:len(network_outputs)] preds.append(predictions[0][0]) patches.append(patch[xs_shared.keys()[0]]) t0 = time.time() pred = glue_patches(preds) if not config.plot and config.multiprocess: jobs = [job for job in jobs if job.is_alive] if len(jobs) >= 3: # print "waiting", len(jobs) jobs[0].join() del jobs[0] jobs.append(mp.Process(target=extract_nodules, args=((pred, patient_id, expid),) ) ) jobs[-1].daemon=True jobs[-1].start() else: rois = extract_nodules((pred, patient_id, expid)) print "patient", patient_id, len(rois), "nodules" now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now print " %s since start (+%.2f s)" % (utils.hms(time_since_start), time_since_prev) if config.plot: plot_segmentation_and_nodules(patches, rois, pred, patient_id) for job in jobs: job.join() return
prefix_test=img_dir, transfo_params=transfos, paired_transfos=paired_transfos, ) all_outputs = [] for i in xrange(tta_times): start_time = time.time() print "\t\t\tTTA %i OF %i...\n\n" % (i + 1, tta_times) pred = do_pred(test_gen) all_outputs.append(pred) time_since_start = time.time() - start_time print "\nOne TTA iteration took %s.\n" % \ hms(time_since_start) print "Estimated %s to go...\n\n" % \ hms((tta_times - (i + 1)) * time_since_start) print "\n\nDone doing TTA predictions! Ensembling ...\n" if tta_ensemble_method == 'mean': outputs = np.mean(np.asarray(all_outputs), axis=0) elif tta_ensemble_method == 'log_mean': outputs = np.mean(np.log(1e-5 + np.asarray(all_outputs)), axis=0) subm_fn = 'subm/' + "%s--%s[%s][%s][%i][%s].csv" % \ (model_data['model_id'], model_data['configuration'], dataset, tta_transfos, tta_times,
def train_model(expid): """ This function trains the model, and will use the name expid to store and report the results :param expid: the name :return: """ metadata_path = MODEL_PATH + "%s.pkl" % expid # Fast_run is very slow, but might be better of debugging. # Make sure you don't leave it on accidentally! if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Build model" # Get the input and output layers of our model interface_layers = config.build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] # merge all output layers into a fictional dummy layer which is not actually used top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values()) # get all the trainable parameters from the model all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) # do not train beyond the layers in cutoff_gradients. Remove all their parameters from the optimization process if "cutoff_gradients" in interface_layers: submodel_params = [ param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value) ] all_params = [p for p in all_params if p not in submodel_params] # some parameters might already be pretrained! Load their values from the requested configuration name. if "pretrained" in interface_layers: for config_name, layers_dict in interface_layers[ "pretrained"].iteritems(): pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name pretrained_resume_metadata = np.load(pretrained_metadata_path) pretrained_top_layer = lasagne.layers.MergeLayer( incomings=layers_dict.values()) lasagne.layers.set_all_param_values( pretrained_top_layer, pretrained_resume_metadata['param_values']) # Count all the parameters we are actually optimizing, and visualize what the model looks like. print string.ljust(" layer output shapes:", 26), print string.ljust("#params:", 10), print string.ljust("#data:", 10), print "output shape:" def comma_seperator(v): return '{:,.0f}'.format(v) for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 22) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(comma_seperator(num_param), 10) num_size = string.ljust( comma_seperator(np.prod(layer.output_shape[1:])), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print " number of parameters:", comma_seperator(num_params) # Build all the objectives requested by the configuration objectives = config.build_objectives(interface_layers) train_losses_theano = { key: ob.get_loss() for key, ob in objectives["train"].iteritems() } validate_losses_theano = { key: ob.get_loss(deterministic=True) for key, ob in objectives["validate"].iteritems() } # Create the Theano variables necessary to interface with the models # the input: xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } # the output: ys_shared = { key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype=target_var.dtype) for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()) for (key, target_var) in ob.target_vars.iteritems() } # Set up the learning rate schedule learning_rate_schedule = config.learning_rate_schedule learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) # We only work on one batch at the time on our chunk. Set up the Theano code which does this idx = T.lscalar( 'idx' ) # the value representing the number of the batch we are currently into our chunk of data givens = dict() for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()): for (key, target_var) in ob.target_vars.iteritems(): givens[target_var] = ys_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] for (key, l_in) in input_layers.iteritems(): givens[l_in.input_var] = xs_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] # sum over the losses of the objective we optimize. We will optimize this sum (either minimize or maximize) # sum makes the learning rate independent of batch size! if hasattr(config, "dont_sum_losses") and config.dont_sum_losses: train_loss_theano = T.mean(train_losses_theano["objective"]) else: train_loss_theano = T.sum(train_losses_theano["objective"]) * ( -1 if objectives["train"]["objective"].optimize == MAXIMIZE else 1) # build the update step for Theano updates = config.build_updates(train_loss_theano, all_params, learning_rate) if hasattr(config, "print_gradnorm") and config.print_gradnorm: all_grads = theano.grad(train_loss_theano, all_params, disconnected_inputs='warn') grad_norm = T.sqrt(T.sum([(g**2).sum() for g in all_grads]) + 1e-9) grad_norm.name = "grad_norm" theano_printer.print_me_this(" grad norm", grad_norm) # train_losses_theano["grad_norm"] = grad_norm # Compile the Theano function of your model+objective print "Compiling..." iter_train = theano.function( [idx], train_losses_theano.values() + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) if hasattr(config, "print_gradnorm") and config.print_gradnorm: del theano_printer._stuff_to_print[-1] # For validation, we also like to have something which returns the output of our model without the objective network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_predict = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore") # The data loader will need to know which kinds of data it actually needs to load # collect all the necessary tags for the model. required_input = { key: l_in.output_shape for (key, l_in) in input_layers.iteritems() } required_output = { key: None # size is not needed for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()) for (key, target_var) in ob.target_vars.iteritems() } # The data loaders need to prepare before they should start # This is usually where the data is loaded from disk onto memory print "Preparing dataloaders" config.training_data.prepare() for validation_data in config.validation_data.values(): validation_data.prepare() print "Will train for %s epochs" % config.training_data.epochs # If this is the second time we run this configuration, we might need to load the results of the previous # optimization. Check if this is the case, and load the parameters and stuff. If not, start from zero. if config.restart_from_save and os.path.isfile(metadata_path): print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) start_chunk_idx = resume_metadata['chunks_since_start'] + 1 # set lr to the correct value current_lr = np.float32( utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) print " setting learning rate to %.7f" % current_lr learning_rate.set_value(current_lr) losses = resume_metadata['losses'] config.training_data.skip_first_chunks(start_chunk_idx) else: start_chunk_idx = 0 losses = dict() losses[TRAINING] = dict() losses[VALIDATION] = dict() for loss_name in train_losses_theano.keys(): losses[TRAINING][loss_name] = list() for dataset_name in config.validation_data.keys(): losses[VALIDATION][dataset_name] = dict() for loss_name in validate_losses_theano.keys(): losses[VALIDATION][dataset_name][loss_name] = list() # Make a data generator which returns preprocessed chunks of data which are fed to the model # Note that this is a generator object! It is a special kind of iterator. chunk_size = config.batches_per_chunk * config.batch_size # Weight normalization if hasattr(config, "init_weight_norm") and not config.restart_from_save: theano_printer._stuff_to_print = [] from theano_utils.weight_norm import train_weight_norm train_weight_norm(config, output_layers, all_layers, idx, givens, xs_shared, chunk_size, required_input, required_output) training_data_generator = buffering.buffered_gen_threaded( config.training_data.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output=required_output, )) # Estimate the number of batches we will train for. chunks_train_idcs = itertools.count(start_chunk_idx) if config.training_data.epochs: num_chunks_train = int(1.0 * config.training_data.epochs * config.training_data.number_of_samples / (config.batch_size * config.batches_per_chunk)) else: num_chunks_train = None # Start the timer objects start_time, prev_time = None, None print "Loading first chunks" data_load_time = Timer() gpu_time = Timer() #========================# # This is the train loop # #========================# data_load_time.start() for e, train_data in izip(chunks_train_idcs, training_data_generator): data_load_time.stop() if start_time is None: start_time = time.time() prev_time = start_time print if num_chunks_train: print "Chunk %d/%d" % (e + 1, num_chunks_train) else: print "Chunk %d" % (e + 1) print "==============" print " %s" % config.__name__ # Estimate the current epoch we are at epoch = (1.0 * config.batch_size * config.batches_per_chunk * (e + 1) / config.training_data.number_of_samples) if epoch >= 0.1: print " Epoch %.1f/%s" % (epoch, str(config.training_data.epochs)) else: print " Epoch %.0e/%s" % (epoch, str(config.training_data.epochs)) # for debugging the data loader, it might be useful to dump everything it loaded and analyze it. if config.dump_network_loaded_data: pickle.dump(train_data, open("data_loader_dump_train_%d.pkl" % e, "wb")) # Update the learning rate with the new epoch the number for key, rate in learning_rate_schedule.iteritems(): if epoch >= key: lr = np.float32(rate) learning_rate.set_value(lr) print " learning rate %.0e" % lr # Move this data from the data loader onto the Theano variables for key in xs_shared: xs_shared[key].set_value(train_data["input"][key]) for key in ys_shared: if key not in train_data["output"]: raise Exception( "You forgot to add key %s to OUTPUT_DATA_SIZE_TYPE in your data loader" % key) ys_shared[key].set_value(train_data["output"][key]) # loop over all the batches in one chunk, and keep the losses chunk_losses = np.zeros((len(train_losses_theano), 0)) for b in xrange(config.batches_per_chunk): gpu_time.start() th_result = iter_train(b) gpu_time.stop() resulting_losses = np.stack(th_result[:len(train_losses_theano)], axis=0) # these are not needed anyway, just to make Theano call the print function # stuff_to_print = th_result[-len(theano_printer.get_the_stuff_to_print()):] # print resulting_losses.shape, chunk_losses.shape chunk_losses = np.concatenate((chunk_losses, resulting_losses), axis=1) # check if we found NaN's. When there are NaN's we might as well exit. utils.detect_nans(chunk_losses, xs_shared, ys_shared, all_params) # Average our losses, and print them. mean_train_loss = np.mean(chunk_losses, axis=1) for loss_name, loss in zip(train_losses_theano.keys(), mean_train_loss): losses[TRAINING][loss_name].append(loss) print string.rjust(loss_name + ":", 15), "%.6f" % loss # Now, we will do validation. We do this about every config.epochs_per_validation epochs. # We also always validate at the end of every training! validate_every = max( int((config.epochs_per_validation * config.training_data.number_of_samples) / (config.batch_size * config.batches_per_chunk)), 1) if ((e + 1) % validate_every) == 0 or (num_chunks_train and e + 1 >= num_chunks_train): print print " Validating " # We might test on multiple datasets, such as the Train set, Validation set, ... for dataset_name, dataset_generator in config.validation_data.iteritems( ): # Start loading the validation data! validation_chunk_generator = dataset_generator.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output=required_output, ) print " %s (%d/%d samples)" % ( dataset_name, dataset_generator.number_of_samples_in_iterator, dataset_generator.number_of_samples) print " -----------------------" # If there are no validation samples, don't bother validating. if dataset_generator.number_of_samples == 0: continue validation_predictions = None # Keep the labels of the validation data for later. output_keys_to_store = set() losses_to_store = dict() for key, ob in objectives["validate"].iteritems(): if ob.mean_over_samples: losses_to_store[key] = None else: output_keys_to_store.add(ob.target_key) chunk_labels = {k: None for k in output_keys_to_store} store_network_output = (len(output_keys_to_store) > 0) # loop over all validation data chunks data_load_time.start() for validation_data in buffering.buffered_gen_threaded( validation_chunk_generator): data_load_time.stop() num_batches_chunk_eval = config.batches_per_chunk # set the validation data to the required Theano variables. Note, there is no # use setting the output variables, as we do not have labels of the validation set! for key in xs_shared: xs_shared[key].set_value(validation_data["input"][key]) # store all the output keys required for finding the validation error for key in output_keys_to_store: new_data = validation_data["output"][ key][:validation_data["valid_samples"]] if chunk_labels[key] is None: chunk_labels[key] = new_data else: chunk_labels[key] = np.concatenate( (chunk_labels[key], new_data), axis=0) # loop over the batches of one chunk, and keep the predictions chunk_predictions = None for b in xrange(num_batches_chunk_eval): gpu_time.start() th_result = iter_predict(b) gpu_time.stop() resulting_predictions = np.stack( th_result[:len(network_outputs)], axis=0) assert len( network_outputs ) == 1, "Multiple outputs not implemented yet" if chunk_predictions is None: chunk_predictions = resulting_predictions else: chunk_predictions = np.concatenate( (chunk_predictions, resulting_predictions), axis=1) # Check for NaN's. Panic if there are NaN's during validation. utils.detect_nans(chunk_predictions, xs_shared, ys_shared, all_params) # add the predictions of this chunk, to the global predictions (if needed) if chunk_predictions is not None: chunk_predictions = chunk_predictions[:validation_data[ VALID_SAMPLES]] if store_network_output: if validation_predictions is None: validation_predictions = chunk_predictions else: validation_predictions = np.concatenate( (validation_predictions, chunk_predictions), axis=1) # if you can calculate the losses per chunk, and take the mean afterwards, do that. for key, ob in objectives["validate"].iteritems(): if ob.mean_over_samples: new_losses = [] for i in xrange(validation_data[VALID_SAMPLES]): loss = ob.get_loss_from_lists( chunk_predictions[0, i:i + 1], validation_data["output"][ ob.target_key][i:i + 1]) new_losses.append(loss) new_losses = np.array(new_losses) if losses_to_store[key] is None: losses_to_store[key] = new_losses else: losses_to_store[key] = np.concatenate( (losses_to_store[key], new_losses), axis=0) data_load_time.start() data_load_time.stop() # Compare the predictions with the actual labels and print them. for key, ob in objectives["validate"].iteritems(): if ob.mean_over_samples: loss = np.mean(losses_to_store[key]) else: loss = ob.get_loss_from_lists( validation_predictions[0, :], chunk_labels[ob.target_key]) losses[VALIDATION][dataset_name][key].append(loss) print string.rjust(key + ":", 17), "%.6f" % loss print # Good, we did one chunk. Let us check how much time this took us. Print out some stats. now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now # This is the most useful stat of all! Keep this number low, and your total optimization time will be low too. print " on average %dms per training sample" % ( 1000. * time_since_start / ((e + 1 - start_chunk_idx) * config.batch_size * config.batches_per_chunk)) print " %s since start (+%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " %s waiting on gpu vs %s waiting for data" % (gpu_time, data_load_time) try: if num_chunks_train: # only if we ever stop running est_time_left = time_since_start * ( float(num_chunks_train - (e + 1 - start_chunk_idx)) / float(e + 1 - start_chunk_idx)) eta = datetime.datetime.now() + datetime.timedelta( seconds=est_time_left) eta_str = eta.strftime("%c") print " estimated %s to go" % utils.hms(est_time_left) print " (ETA: %s)" % eta_str if hasattr(config, "print_mean_chunks"): avg_train = losses[TRAINING]["objective"] n = min(len(avg_train), config.print_mean_chunks) avg_train = avg_train[-n:] print " mean loss last %i chunks: %.3f" % ( n, np.mean(avg_train)) except OverflowError: # Shit happens print " This will take really long, like REALLY long." if hasattr(config, "print_score_every_chunk") and config.print_score_every_chunk\ and len(losses[VALIDATION]["training set"]["objective"]) > 0: print " train: best %.3f latest %.3f, valid: best %.3f latest %.3f " % ( np.min(losses[VALIDATION]["training set"]["objective"]), losses[VALIDATION]["training set"]["objective"][-1], np.min(losses[VALIDATION]["validation set"]["objective"]), losses[VALIDATION]["validation set"]["objective"][-1]) # Save the data every config.save_every_chunks chunks. Or at the end of the training. # We should make it config.save_every_epochs epochs sometimes. Consistency if ((e + 1) % config.save_every_chunks) == 0 or ( num_chunks_train and e + 1 >= num_chunks_train): print print "Saving metadata, parameters" with open(metadata_path, 'w') as f: pickle.dump( { 'metadata_path': metadata_path, 'configuration_file': config.__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': e, 'losses': losses, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer) }, f, pickle.HIGHEST_PROTOCOL) print " saved to %s" % metadata_path print # reset the timers for next round. This needs to happen here, because at the end of the big for loop # we already want te get a chunk immediately for the next loop. The iterator is an argument of the for loop. gpu_time.reset() data_load_time.reset() data_load_time.start() return
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Using" print " %s" % metadata_path print "To generate" print " %s" % prediction_path print " %s" % submission_path print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) all_layers = lasagne.layers.get_all_layers(top_layer) num_params = lasagne.layers.count_params(top_layer) print " number of parameters: %d" % num_params print string.ljust(" layer output shapes:",36), print string.ljust("#params:",10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print " %s %s %s" % (name, num_param, layer.output_shape) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = range(1, num_chunks+1) data_loader.filter_patient_folders() create_test_gen = partial(config().create_test_gen, required_input_keys = xs_shared.keys(), required_output_keys = ["patients", "classification_correction_function"], ) print "Generate predictions with this model" start_time = time.time() prev_time = start_time predictions = [{"patient": i+1, "systole": np.zeros((0,600)), "diastole": np.zeros((0,600)) } for i in xrange(NUM_PATIENTS)] for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())): print " load testing data onto GPU" for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] classification_correction = test_data["output"]["classification_correction_function"] print " patients:", " ".join(map(str, patient_ids)) print " chunk %d/%d" % (e, num_chunks) for b in xrange(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))} kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64') for idx, patient_id in enumerate(patient_ids[b*config().batch_size:(b+1)*config().batch_size]): if patient_id != 0: index = patient_id-1 patient_data = predictions[index] assert patient_id==patient_data["patient"] kaggle_systole = kaggle_systoles[idx:idx+1,:] kaggle_diastole = kaggle_diastoles[idx:idx+1,:] assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all() kaggle_systole = classification_correction[b*config().batch_size + idx](kaggle_systole) kaggle_diastole = classification_correction[b*config().batch_size + idx](kaggle_diastole) assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all() patient_data["systole"] = np.concatenate((patient_data["systole"], kaggle_systole ),axis=0) patient_data["diastole"] = np.concatenate((patient_data["diastole"], kaggle_diastole ),axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print already_printed = False for prediction in predictions: if prediction["systole"].size>0 and prediction["diastole"].size>0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction["systole_average"] = average_method(prediction["systole"]) prediction["diastole_average"] = average_method(prediction["diastole"]) try: test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) except: if not already_printed: print "WARNING: These distributions are not distributions" already_printed = True prediction["systole_average"] = make_monotone_distribution(prediction["systole_average"]) prediction["diastole_average"] = make_monotone_distribution(prediction["diastole_average"]) test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) print "Calculating training and validation set scores for reference" validation_dict = {} for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: errors = [] for patient in patient_ids: prediction = predictions[patient-1] if "systole_average" in prediction: assert patient == regular_labels[patient-1, 0] error = CRSP(prediction["systole_average"], regular_labels[patient-1, 1]) errors.append(error) error = CRSP(prediction["diastole_average"], regular_labels[patient-1, 2]) errors.append(error) if len(errors)>0: errors = np.array(errors) estimated_CRSP = np.mean(errors) print " %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP) validation_dict[set_name] = estimated_CRSP else: print " %s kaggle loss: not calculated" % (string.rjust(set_name, 12)) print "dumping prediction file to %s" % prediction_path with open(prediction_path, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'submission_path': submission_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions': predictions, 'validation_errors': validation_dict, }, f, pickle.HIGHEST_PROTOCOL) print "prediction file dumped" print "dumping submission file to %s" % submission_path with open(submission_path, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(['Id'] + ['P%d'%i for i in xrange(600)]) for prediction in predictions: # the submission only has patients 501 to 700 if prediction["patient"] in data_loader.test_patients_indices: if "diastole_average" not in prediction or "systole_average" not in prediction: raise Exception("Not all test-set patients were predicted") csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + ["%.18f" % p for p in prediction["diastole_average"].flatten()]) csvwriter.writerow(["%d_Systole" % prediction["patient"]] + ["%.18f" % p for p in prediction["systole_average"].flatten()]) print "submission file dumped" return
def analyze_model(expid, path_to_function, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) analysis_path = ANALYSIS_PATH + "%s/" % expid if not os.path.exists(analysis_path): os.mkdir(analysis_path) if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Using" print " %s" % metadata_path print "To generate" print " %s" % analysis_path interface_layers = config.build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values()) all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) if "cutoff_gradients" in interface_layers: submodel_params = [ param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value) ] all_params = [p for p in all_params if p not in submodel_params] if "pretrained" in interface_layers: for config_name, layers_dict in interface_layers[ "pretrained"].iteritems(): pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split( '.')[1] pretrained_resume_metadata = np.load(pretrained_metadata_path) pretrained_top_layer = lasagne.layers.MergeLayer( incomings=layers_dict.values()) lasagne.layers.set_all_param_values( pretrained_top_layer, pretrained_resume_metadata['param_values']) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print string.ljust(" layer output shapes:", 34), print string.ljust("#params:", 10), print string.ljust("#data:", 10), print "output shape:" def comma_seperator(v): return '{:,.0f}'.format(v) for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 30) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(comma_seperator(num_param), 10) num_size = string.ljust( comma_seperator(np.prod(layer.output_shape[1:])), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) print " number of parameters:", comma_seperator(num_params) objectives = config.build_objectives(interface_layers) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } ys_shared = { key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype=target_var.dtype) for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()) for (key, target_var) in ob.target_vars.iteritems() } idx = T.lscalar('idx') givens = dict() for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()): for (key, target_var) in ob.target_vars.iteritems(): givens[target_var] = ys_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] for (key, l_in) in input_layers.iteritems(): givens[l_in.input_var] = xs_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] print "Compiling..." outputs = [ lasagne.layers.helper.get_output(interface, deterministic=True) for interface in interface_layers["outputs"].values() ] iter_validate = theano.function([idx], outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore") required_input = { key: l_in.output_shape for (key, l_in) in input_layers.iteritems() } required_output = { key: None # size is not needed for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()) for (key, target_var) in ob.target_vars.iteritems() } print "Preparing dataloaders" config.training_data.prepare() for validation_data in config.validation_data.values(): validation_data.prepare() chunk_size = config.batches_per_chunk * config.batch_size training_data_generator = buffering.buffered_gen_threaded( config.training_data.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output=required_output, )) print "Will train for %s epochs" % config.training_data.epochs if os.path.isfile(metadata_path): print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) else: raise Exception("No previous parameters found!") start_time, prev_time = None, None print "Loading first chunks" data_load_time = Timer() gpu_time = Timer() data_load_time.start() for dataset_name, dataset_generator in config.validation_data.iteritems(): data_load_time.stop() if start_time is None: start_time = time.time() prev_time = start_time validation_chunk_generator = dataset_generator.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output=required_output, ) print " %s (%d/%d samples)" % ( dataset_name, dataset_generator.number_of_samples_in_iterator, dataset_generator.number_of_samples) print " -----------------------" data_load_time.start() for validation_data in buffering.buffered_gen_threaded( validation_chunk_generator): data_load_time.stop() num_batches_chunk_eval = config.batches_per_chunk for key in xs_shared: xs_shared[key].set_value(validation_data["input"][key]) for key in ys_shared: ys_shared[key].set_value(validation_data["output"][key]) idx = 0 for b in xrange(num_batches_chunk_eval): gpu_time.start() th_result = iter_validate(b) gpu_time.stop() for idx_ex in xrange(config.batch_size): # Create all the kwargs to analyze for each test run kwargs = {} for key in xs_shared.keys(): kwargs[key] = validation_data["input"][key][idx + idx_ex] for key in ys_shared.keys(): kwargs[key] = validation_data["output"][key][idx + idx_ex] for index, key in enumerate( interface_layers["outputs"].keys()): kwargs[key] = th_result[index][idx_ex] id = validation_data[IDS][idx + idx_ex] if id is not None: # Load the required function in dynamically importable = path_to_importable_string( path_to_function) analysis_module = importlib.import_module(importable) analysis_module.analyze(id=id, analysis_path=analysis_path, **kwargs) idx += config.batch_size data_load_time.start() data_load_time.stop() print now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now print " %s since start (+%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " (%s waiting on gpu vs %s waiting for data)" % (gpu_time, data_load_time) gpu_time.reset() data_load_time.reset() data_load_time.start() return
all_accuracy_train.append(acc_train) auc_train = utils.auc(predictions, labels) all_auc_train.append(auc_train) if 1 == 1: print " average training loss: %.5f" % loss_train print " average training accuracy: %.5f" % acc_train print " average auc: %.5f" % auc_train now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now # est_time_left = time_since_start * num_epochs # eta = datetime.now() + timedelta(seconds=est_time_left) # eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) # print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0): print " saving parameters and metadata" with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f: pickle.dump( { 'config_name': config_name, 'param_values': nn.layers.get_all_param_values(l_out), 'losses_train': all_losses_train, 'accuracy_train': all_accuracy_train, 'auc_train': all_auc_train,
cont_denom]) del outputs now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * \ ((num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_ids[0])) eta = datetime.datetime.now() + \ datetime.timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % ( hms(time_since_start), time_since_prev ) print " estimated %s to go (ETA: %s)\n" % ( hms(est_time_left), eta_str ) # Save after every validate. if (((e + 1) % save_every) == 0 or ((e + 1) % validate_every) == 0 or ((e + 1) == num_chunks_train)): print "\nSaving model ..." with open(dump_path, 'w') as f: pickle.dump({
tmp_losses_valid.append(l_valid) # calculate validation loss across validation set valid_loss = np.mean(tmp_losses_valid) # TODO: taking mean is not correct if chunks have different sizes!!! print('Validation loss: ', valid_loss) losses_eval_valid.append(valid_loss) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print(" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print(" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() if ((chunk_idx + 1) % config().save_every) == 0: print() print('Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks)) print('Saving metadata, parameters') with open(metadata_path, 'w') as f: pickle.dump({ 'configuration_file': config_name, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': chunk_idx, 'losses_eval_train': losses_eval_train,
auc_train = utils.auc(predictions, labels) all_auc_train.append(auc_train) if 1==1: print " average training loss: %.5f" % loss_train print " average training accuracy: %.5f" % acc_train print " average auc: %.5f" % auc_train now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now # est_time_left = time_since_start * num_epochs # eta = datetime.now() + timedelta(seconds=est_time_left) # eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) # print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0): print " saving parameters and metadata" with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f: pickle.dump({ 'config_name': config_name, 'param_values': nn.layers.get_all_param_values(l_out), 'losses_train': all_losses_train, 'accuracy_train': all_accuracy_train, 'auc_train': all_auc_train, 'accuracy_eval_valid': all_accuracy_eval_valid, 'accuracy_eval_train': all_accuracy_eval_train, 'auc_eval_train': all_auc_eval_train,
cont_denom ]) del outputs now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * \ ((num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_ids[0])) eta = datetime.datetime.now() + \ datetime.timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)\n" % (hms(est_time_left), eta_str) # Save after every validate. if (((e + 1) % save_every) == 0 or ((e + 1) % validate_every) == 0 or ((e + 1) == num_chunks_train)): print "\nSaving model ..." with open(dump_path, 'w') as f: pickle.dump( { 'configuration': model.config_name, 'model_id': model_id, 'chunks_since_start': e, 'time_since_start': time_since_start,
def train_model(expid): metadata_path = MODEL_PATH + "%s.pkl" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" data_loader.filter_patient_folders() print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) if "cutoff_gradients" in interface_layers: submodel_params = [param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value)] all_params = [p for p in all_params if p not in submodel_params] if "pretrained" in interface_layers: for config_name, layers_dict in interface_layers["pretrained"].iteritems(): pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split('.')[1] pretrained_resume_metadata = np.load(pretrained_metadata_path) pretrained_top_layer = lasagne.layers.MergeLayer( incomings = layers_dict.values() ) lasagne.layers.set_all_param_values(pretrained_top_layer, pretrained_resume_metadata['param_values']) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print string.ljust(" layer output shapes:",36), print string.ljust("#params:",10), print string.ljust("#data:",10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(int(num_param).__str__(), 10) num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) print " number of parameters: %d" % num_params obj = config().build_objective(interface_layers) train_loss_theano = obj.get_loss() kaggle_loss_theano = obj.get_kaggle_loss() segmentation_loss_theano = obj.get_segmentation_loss() validation_other_losses = collections.OrderedDict() validation_train_loss = obj.get_loss(average=False, deterministic=True, validation=True, other_losses=validation_other_losses) validation_kaggle_loss = obj.get_kaggle_loss(average=False, deterministic=True, validation=True) validation_segmentation_loss = obj.get_segmentation_loss(average=False, deterministic=True, validation=True) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } # contains target_vars of the objective! Not the output layers desired values! # There can be more output layers than are strictly required for the objective # e.g. for debugging ys_shared = { key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype='float32') for (key, target_var) in obj.target_vars.iteritems() } learning_rate_schedule = config().learning_rate_schedule learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) idx = T.lscalar('idx') givens = dict() for key in obj.target_vars.keys(): if key=="segmentation": givens[obj.target_vars[key]] = ys_shared[key][idx*config().sunny_batch_size : (idx+1)*config().sunny_batch_size] else: givens[obj.target_vars[key]] = ys_shared[key][idx*config().batch_size : (idx+1)*config().batch_size] for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] updates = config().build_updates(train_loss_theano, all_params, learning_rate) #grad_norm = T.sqrt(T.sum([(g**2).sum() for g in theano.grad(train_loss_theano, all_params)])) #theano_printer.print_me_this("Grad norm", grad_norm) iter_train = theano.function([idx], [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) iter_validate = theano.function([idx], [validation_train_loss, validation_kaggle_loss, validation_segmentation_loss] + [v for _, v in validation_other_losses.items()] + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore") num_chunks_train = int(config().num_epochs_train * NUM_TRAIN_PATIENTS / (config().batch_size * config().batches_per_chunk)) print "Will train for %d chunks" % num_chunks_train if config().restart_from_save and os.path.isfile(metadata_path): print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) start_chunk_idx = resume_metadata['chunks_since_start'] + 1 chunks_train_idcs = range(start_chunk_idx, num_chunks_train) # set lr to the correct value current_lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) print " setting learning rate to %.7f" % current_lr learning_rate.set_value(current_lr) losses_train = resume_metadata['losses_train'] losses_eval_valid = resume_metadata['losses_eval_valid'] losses_eval_train = resume_metadata['losses_eval_train'] losses_eval_valid_kaggle = [] #resume_metadata['losses_eval_valid_kaggle'] losses_eval_train_kaggle = [] #resume_metadata['losses_eval_train_kaggle'] else: chunks_train_idcs = range(num_chunks_train) losses_train = [] losses_eval_valid = [] losses_eval_train = [] losses_eval_valid_kaggle = [] losses_eval_train_kaggle = [] create_train_gen = partial(config().create_train_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys()# + ["patients"], ) create_eval_valid_gen = partial(config().create_eval_valid_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys()# + ["patients"] ) create_eval_train_gen = partial(config().create_eval_train_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys() ) print "Train model" start_time = time.time() prev_time = start_time num_batches_chunk = config().batches_per_chunk for e, train_data in izip(chunks_train_idcs, buffering.buffered_gen_threaded(create_train_gen())): print "Chunk %d/%d" % (e + 1, num_chunks_train) epoch = (1.0 * config().batch_size * config().batches_per_chunk * (e+1) / NUM_TRAIN_PATIENTS) print " Epoch %.1f" % epoch for key, rate in learning_rate_schedule.iteritems(): if epoch >= key: lr = np.float32(rate) learning_rate.set_value(lr) print " learning rate %.7f" % lr if config().dump_network_loaded_data: pickle.dump(train_data, open("data_loader_dump_train_%d.pkl"%e, "wb")) for key in xs_shared: xs_shared[key].set_value(train_data["input"][key]) for key in ys_shared: ys_shared[key].set_value(train_data["output"][key]) #print "train:", sorted(train_data["output"]["patients"]) losses = [] kaggle_losses = [] segmentation_losses = [] for b in xrange(num_batches_chunk): iter_result = iter_train(b) loss, kaggle_loss, segmentation_loss = tuple(iter_result[:3]) utils.detect_nans(loss, xs_shared, ys_shared, all_params) losses.append(loss) kaggle_losses.append(kaggle_loss) segmentation_losses.append(segmentation_loss) mean_train_loss = np.mean(losses) print " mean training loss:\t\t%.6f" % mean_train_loss losses_train.append(mean_train_loss) print " mean kaggle loss:\t\t%.6f" % np.mean(kaggle_losses) print " mean segment loss:\t\t%.6f" % np.mean(segmentation_losses) if ((e + 1) % config().validate_every) == 0: print print "Validating" if config().validate_train_set: subsets = ["validation", "train"] gens = [create_eval_valid_gen, create_eval_train_gen] losses_eval = [losses_eval_valid, losses_eval_train] losses_kaggle = [losses_eval_valid_kaggle, losses_eval_train_kaggle] else: subsets = ["validation"] gens = [create_eval_valid_gen] losses_eval = [losses_eval_valid] losses_kaggle = [losses_eval_valid_kaggle] for subset, create_gen, losses_validation, losses_kgl in zip(subsets, gens, losses_eval, losses_kaggle): vld_losses = [] vld_kaggle_losses = [] vld_segmentation_losses = [] vld_other_losses = {k:[] for k,_ in validation_other_losses.items()} print " %s set (%d samples)" % (subset, get_number_of_validation_samples(set=subset)) for validation_data in buffering.buffered_gen_threaded(create_gen()): num_batches_chunk_eval = config().batches_per_chunk if config().dump_network_loaded_data: pickle.dump(validation_data, open("data_loader_dump_valid_%d.pkl"%e, "wb")) for key in xs_shared: xs_shared[key].set_value(validation_data["input"][key]) for key in ys_shared: ys_shared[key].set_value(validation_data["output"][key]) #print "validate:", validation_data["output"]["patients"] for b in xrange(num_batches_chunk_eval): losses = tuple(iter_validate(b)[:3+len(validation_other_losses)]) loss, kaggle_loss, segmentation_loss = losses[:3] other_losses = losses[3:] vld_losses.extend(loss) vld_kaggle_losses.extend(kaggle_loss) vld_segmentation_losses.extend(segmentation_loss) for k, other_loss in zip(validation_other_losses, other_losses): vld_other_losses[k].extend(other_loss) vld_losses = np.array(vld_losses) vld_kaggle_losses = np.array(vld_kaggle_losses) vld_segmentation_losses = np.array(vld_segmentation_losses) for k in validation_other_losses: vld_other_losses[k] = np.array(vld_other_losses[k]) # now select only the relevant section to average sunny_len = get_lenght_of_set(name="sunny", set=subset) regular_len = get_lenght_of_set(name="regular", set=subset) num_valid_samples = get_number_of_validation_samples(set=subset) #print losses[:num_valid_samples] #print kaggle_losses[:regular_len] #print segmentation_losses[:sunny_len] loss_to_save = obj.compute_average(vld_losses[:num_valid_samples]) print " mean training loss:\t\t%.6f" % loss_to_save print " mean kaggle loss:\t\t%.6f" % np.mean(vld_kaggle_losses[:regular_len]) print " mean segment loss:\t\t%.6f" % np.mean(vld_segmentation_losses[:sunny_len]) # print " acc:\t%.2f%%" % (acc * 100) for k, v in vld_other_losses.items(): print " mean %s loss:\t\t%.6f" % (k, obj.compute_average(v[:num_valid_samples], loss_name=k)) print losses_validation.append(loss_to_save) kaggle_to_save = np.mean(vld_kaggle_losses[:regular_len]) losses_kgl.append(kaggle_to_save) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if ((e + 1) % config().save_every) == 0: print print "Saving metadata, parameters" with open(metadata_path, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': e, 'losses_train': losses_train, 'losses_eval_train': losses_eval_train, 'losses_eval_train_kaggle': losses_eval_train_kaggle, 'losses_eval_valid': losses_eval_valid, 'losses_eval_valid_kaggle': losses_eval_valid_kaggle, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer) }, f, pickle.HIGHEST_PROTOCOL) print " saved to %s" % metadata_path print # store all known outputs from last batch: if config().take_a_dump: all_theano_variables = [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print() for layer in all_layers[:-1]: all_theano_variables.append(lasagne.layers.helper.get_output(layer)) iter_train = theano.function([idx], all_theano_variables, givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) train_data["intermediates"] = iter_train(0) pickle.dump(train_data, open(metadata_path + "-dump", "wb")) return
def main(): sym_y = T.imatrix('target_output') sym_mask = T.matrix('mask') sym_x = T.tensor3() TOL = 1e-5 num_epochs = config.epochs batch_size = config.batch_size #### DATA #### # print "@@@@TESTING@@@@" # l_in = nn.layers.InputLayer(shape=(None, 700, 42)) # l_dim_a = nn.layers.DimshuffleLayer( # l_in, (0,2,1)) # l_conv_a = nn.layers.Conv1DLayer( # incoming=l_dim_a, num_filters=42, border_mode='same', # filter_size=3, stride=1, nonlinearity=nn.nonlinearities.rectify) # l_dim_b = nn.layers.DimshuffleLayer( # l_conv_a, (0,2,1)) # out = nn.layers.get_output(l_dim_b, sym_x) # testvar = np.ones((128, 700, 42)).astype('float32') # print "@@@@EVAL@@@@" # john = out.eval({sym_x: testvar}) # print("Johns shape") # print(john.shape) print("Building network ...") ##########################DEBUG########################## l_in, l_out = config.build_model() ##########################DEBUG########################## all_layers = nn.layers.get_all_layers(l_out) num_params = nn.layers.count_params(l_out) print(" number of parameters: %d" % num_params) print(" layer output shapes:") for layer in all_layers: name = layer.__class__.__name__ print(" %s %s" % (name, nn.layers.get_output_shape(layer))) print("Creating cost function") # lasagne.layers.get_output produces a variable for the output of the net out_train = nn.layers.get_output( l_out, sym_x, deterministic=False) # testvar = np.ones((128, 700, 42)).astype('float32') # john = out_train.eval({sym_x: testvar}) # print("@@@@@JOHN@@@@@") # print(john.shape) # print(john.reshape((-1, num_classes)).shape) print("Creating eval function") out_eval = nn.layers.get_output( l_out, sym_x, deterministic=True) probs_flat = out_train.reshape((-1, num_classes)) lambda_reg = config.lambda_reg all_params = nn.layers.get_all_params(l_out) for i, p in enumerate(all_params): if p.ndim == 3: values = p.get_value() if side == 'right': values[..., int(values.shape[2] / 2.0 - 0.5):] = 0 p.set_value(values) all_params[i] = p[..., : int(values.shape[2] / 2.0 - 0.5)] else: values[..., : int(values.shape[2] / 2.0 + 0.5)] = 0 p.set_value(values) all_params[i] = p[..., int(values.shape[2] / 2.0 + 0.5):] params = [el for el in all_params if el.name == "W" or el.name == "gamma"] reg_term = sum(T.sum(p ** 2) for p in params) cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, TOL, 1 - TOL), sym_y.flatten()) cost = T.sum(cost * sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term # Retrieve all parameters from the network all_params = [el for el in all_params if el.name == "W" or el.name == "gamma" or el.name == "beta"] # Setting the weights if hasattr(config, 'set_weights'): nn.layers.set_all_param_values(l_out, config.set_weights()) # Compute SGD updates for training print("Computing updates ...") if hasattr(config, 'learning_rate_schedule'): learning_rate_schedule = config.learning_rate_schedule # Import learning rate schedule else: learning_rate_schedule = {0: config.learning_rate} learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) all_grads = T.grad(cost, all_params) cut_norm = config.cut_grad updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True) if optimizer == "rmsprop": updates = nn.updates.rmsprop(updates, all_params, learning_rate) elif optimizer == "adadelta": updates = nn.updates.adadelta(updates, all_params, learning_rate) elif optimizer == "adagrad": updates = nn.updates.adagrad(updates, all_params, learning_rate) elif optimizer == "nag": momentum_schedule = config.momentum_schedule momentum = theano.shared(np.float32(momentum_schedule[0])) updates = nn.updates.nesterov_momentum(updates, all_params, learning_rate, momentum) else: sys.exit("please choose either <rmsprop/adagrad/adadelta/nag> in configfile") # Theano functions for training and computing cost print ("config.batch_size %d" % batch_size) print ("data.num_classes %d" % num_classes) if hasattr(config, 'build_model'): print("has build model") print("Compiling train ...") # Use this for training (see deterministic = False above) train = theano.function( [sym_x, sym_y, sym_mask], [cost, out_train, norm_calc], updates=updates) print("Compiling eval ...") # use this for eval (deterministic = True + no updates) eval = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval]) # Start timers start_time = time.time() prev_time = start_time all_losses_train = [] all_accuracy_train = [] all_losses_eval_train = [] all_losses_eval_valid = [] all_losses_eval_test = [] all_accuracy_eval_train = [] all_accuracy_eval_valid = [] all_accuracy_eval_test = [] all_mean_norm = [] import data X_train, X_valid, y_train, y_valid, mask_train, mask_valid, num_seq_train \ = data.get_train() X_train, X_valid = X_train[..., 21:], X_valid[..., 21:] # Only train with pssm scores print("y shape") print(y_valid.shape) print("X shape") print(X_valid.shape) # Start training for i in range(y_train.shape[0]): for j in range(y_train.shape[1]): if y_train[i][j] == 5: y_train[i][j] = 1 else: y_train[i][j] = 0 for i in range(y_valid.shape[0]): for j in range(y_valid.shape[1]): if y_valid[i][j] == 5: y_valid[i][j] = 1 else: y_valid[i][j] = 0 for epoch in range(num_epochs): if (epoch % 10) == 0: print ("Epoch %d of %d" % (epoch + 1, num_epochs)) if epoch in learning_rate_schedule: lr = np.float32(learning_rate_schedule[epoch]) print (" setting learning rate to %.7f" % lr) learning_rate.set_value(lr) if optimizer == "nag": if epoch in momentum_schedule: mu = np.float32(momentum_schedule[epoch]) print (" setting learning rate to %.7f" % mu) momentum.set_value(mu) # print "Shuffling data" seq_names = np.arange(0, num_seq_train) np.random.shuffle(seq_names) X_train = X_train[seq_names] y_train = y_train[seq_names] mask_train = mask_train[seq_names] num_batches = num_seq_train // batch_size losses = [] preds = [] norms = [] for i in range(num_batches): idx = range(i * batch_size, (i + 1) * batch_size) x_batch = X_train[idx] y_batch = y_train[idx] mask_batch = mask_train[idx] loss, out, batch_norm = train(x_batch, y_batch, mask_batch) # print(batch_norm) norms.append(batch_norm) preds.append(out) losses.append(loss) # if ((i+1) % config.write_every_batch == 0) | (i == 0): # if i == 0: # start_place = 0 # else: # start_place = i-config.write_every_batch # print "Batch %d of %d" % (i + 1, num_batches) # print " curbatch training loss: %.5f" % np.mean(losses[start_place:(i+1)]) # print " curbatch training acc: %.5f" % np.mean(accuracy[start_place:(i+1)]) predictions = np.concatenate(preds, axis=0) loss_train = np.mean(losses) all_losses_train.append(loss_train) acc_train = utils.proteins_acc(predictions, y_train[0:num_batches * batch_size], mask_train[0:num_batches * batch_size]) all_accuracy_train.append(acc_train) mean_norm = np.mean(norms) all_mean_norm.append(mean_norm) if 1 == 1: print (" average training loss: %.5f" % loss_train) print (" average training accuracy: %.5f" % acc_train) print (" average norm: %.5f" % mean_norm) sets = [ # ('train', X_train, y_train, mask_train, all_losses_eval_train, all_accuracy_eval_train), ('valid', X_valid, y_valid, mask_valid, all_losses_eval_valid, all_accuracy_eval_valid)] for subset, X, y, mask, all_losses, all_accuracy in sets: print (" validating: %s loss" % subset) preds = [] num_batches = np.size(X, axis=0) // config.batch_size for i in range(num_batches): ## +1 to get the "rest" # print(i) idx = range(i * batch_size, (i + 1) * batch_size) x_batch = X[idx] y_batch = y[idx] mask_batch = mask[idx] loss, out = eval(x_batch, y_batch, mask_batch) preds.append(out) # acc = utils.proteins_acc(out, y_batch, mask_batch) losses.append(loss) # accuracy.append(acc) predictions = np.concatenate(preds, axis=0) # print " pred" # print(predictions.shape) # print(predictions.dtype) loss_eval = np.mean(losses) all_losses.append(loss_eval) # acc_eval = np.mean(accuracy) acc_eval = utils.proteins_acc(predictions, y, mask) all_accuracy.append(acc_eval) print (" average evaluation loss (%s): %.5f" % (subset, loss_eval)) print (" average evaluation accuracy (%s): %.5f" % (subset, acc_eval)) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_prev * (num_epochs - epoch) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print (" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print (" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0): print (" saving parameters and metadata") with open((metadata_path + side + "-%d" % (epoch) + ".pkl"), 'wb') as f: pickle.dump({ 'config_name': config_name, 'param_values': nn.layers.get_all_param_values(l_out), 'losses_train': all_losses_train, 'accuracy_train': all_accuracy_train, 'losses_eval_train': all_losses_eval_train, 'losses_eval_valid': all_losses_eval_valid, 'losses_eval_test': all_losses_eval_test, 'accuracy_eval_valid': all_accuracy_eval_valid, 'accuracy_eval_train': all_accuracy_eval_train, 'accuracy_eval_test': all_accuracy_eval_test, 'mean_norm': all_mean_norm, 'time_since_start': time_since_start, 'i': i, }, f, pickle.HIGHEST_PROTOCOL) print (" stored in %s" % metadata_path) print()
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print("WARNING: not running in fast mode!") print("Using") print(" %s" % metadata_path) print("To generate") print(" %s" % prediction_path) print(" %s" % submission_path) print("Build model") interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=list(output_layers.values())) all_layers = lasagne.layers.get_all_layers(top_layer) num_params = lasagne.layers.count_params(top_layer) print(" number of parameters: %d" % num_params) print(string.ljust(" layer output shapes:", 36), end=' ') print(string.ljust("#params:", 10), end=' ') print("output shape:") for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print(" %s %s %s" % (name, num_param, layer.output_shape)) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.items() } idx = T.lscalar('idx') givens = dict() for key in list(input_layers.keys()): if key == "sunny": givens[input_layers[key].input_var] = xs_shared[key][idx * config( ).sunny_batch_size:(idx + 1) * config().sunny_batch_size] else: givens[input_layers[key]. input_var] = xs_shared[key][idx * config().batch_size:(idx + 1) * config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in list(output_layers.values()) ] iter_test = theano.function( [idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print("Load model parameters for resuming") resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = list(range(1, num_chunks + 1)) data_loader.filter_patient_folders() create_test_gen = partial( config().create_test_gen, required_input_keys=list(xs_shared.keys()), required_output_keys=[ "patients", "classification_correction_function" ], ) print("Generate predictions with this model") start_time = time.time() prev_time = start_time predictions = [{ "patient": i + 1, "systole": np.zeros((0, 600)), "diastole": np.zeros((0, 600)) } for i in range(NUM_PATIENTS)] for e, test_data in zip(itertools.count(start=1), buffering.buffered_gen_threaded( create_test_gen())): print(" load testing data onto GPU") for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] classification_correction = test_data["output"][ "classification_correction_function"] print(" patients:", " ".join(map(str, patient_ids))) print(" chunk %d/%d" % (e, num_chunks)) for b in range(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = { list(output_layers.keys())[i]: network_outputs[i] for i in range(len(output_layers)) } kaggle_systoles, kaggle_diastoles = config().postprocess( network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype( 'float64'), kaggle_diastoles.astype('float64') for idx, patient_id in enumerate( patient_ids[b * config().batch_size:(b + 1) * config().batch_size]): if patient_id != 0: index = patient_id - 1 patient_data = predictions[index] assert patient_id == patient_data["patient"] kaggle_systole = kaggle_systoles[idx:idx + 1, :] kaggle_diastole = kaggle_diastoles[idx:idx + 1, :] assert np.isfinite(kaggle_systole).all() and np.isfinite( kaggle_systole).all() kaggle_systole = classification_correction[ b * config().batch_size + idx](kaggle_systole) kaggle_diastole = classification_correction[ b * config().batch_size + idx](kaggle_diastole) assert np.isfinite(kaggle_systole).all() and np.isfinite( kaggle_systole).all() patient_data["systole"] = np.concatenate( (patient_data["systole"], kaggle_systole), axis=0) patient_data["diastole"] = np.concatenate( (patient_data["diastole"], kaggle_diastole), axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * ( float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print(" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print(" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() already_printed = False for prediction in predictions: if prediction["systole"].size > 0 and prediction["diastole"].size > 0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction["systole_average"] = average_method( prediction["systole"]) prediction["diastole_average"] = average_method( prediction["diastole"]) try: test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) except: if not already_printed: print("WARNING: These distributions are not distributions") already_printed = True prediction["systole_average"] = make_monotone_distribution( prediction["systole_average"]) prediction["diastole_average"] = make_monotone_distribution( prediction["diastole_average"]) test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) print("Calculating training and validation set scores for reference") validation_dict = {} for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: errors = [] for patient in patient_ids: prediction = predictions[patient - 1] if "systole_average" in prediction: assert patient == regular_labels[patient - 1, 0] error = CRSP(prediction["systole_average"], regular_labels[patient - 1, 1]) errors.append(error) error = CRSP(prediction["diastole_average"], regular_labels[patient - 1, 2]) errors.append(error) if len(errors) > 0: errors = np.array(errors) estimated_CRSP = np.mean(errors) print(" %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP)) validation_dict[set_name] = estimated_CRSP else: print(" %s kaggle loss: not calculated" % (string.rjust(set_name, 12))) print("dumping prediction file to %s" % prediction_path) with open(prediction_path, 'w') as f: pickle.dump( { 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'submission_path': submission_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions': predictions, 'validation_errors': validation_dict, }, f, pickle.HIGHEST_PROTOCOL) print("prediction file dumped") print("dumping submission file to %s" % submission_path) with open(submission_path, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(['Id'] + ['P%d' % i for i in range(600)]) for prediction in predictions: # the submission only has patients 501 to 700 if prediction["patient"] in data_loader.test_patients_indices: if "diastole_average" not in prediction or "systole_average" not in prediction: raise Exception("Not all test-set patients were predicted") csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + [ "%.18f" % p for p in prediction["diastole_average"].flatten() ]) csvwriter.writerow(["%d_Systole" % prediction["patient"]] + [ "%.18f" % p for p in prediction["systole_average"].flatten() ]) print("submission file dumped") return
losses_eval_valid[obj_name] = valid_mean means.append(valid_mean) print(obj_name, valid_mean) print('Sum of mean losses:', sum(means)) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print(" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print(" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() if ((chunk_idx + 1) % config().save_every) == 0: print() print('Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks)) print('Saving metadata, parameters') with open(metadata_path, 'w') as f: pickle.dump( { 'configuration_file': config_name, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid,
def main(): sym_y = T.imatrix('target_output') sym_mask = T.matrix('mask') sym_x = T.tensor3() TOL = 1e-5 num_epochs = config.epochs batch_size = config.batch_size #### DATA #### # print "@@@@TESTING@@@@" # l_in = nn.layers.InputLayer(shape=(None, 700, 42)) # l_dim_a = nn.layers.DimshuffleLayer( # l_in, (0,2,1)) # l_conv_a = nn.layers.Conv1DLayer( # incoming=l_dim_a, num_filters=42, border_mode='same', # filter_size=3, stride=1, nonlinearity=nn.nonlinearities.rectify) # l_dim_b = nn.layers.DimshuffleLayer( # l_conv_a, (0,2,1)) # out = nn.layers.get_output(l_dim_b, sym_x) # testvar = np.ones((128, 700, 42)).astype('float32') # print "@@@@EVAL@@@@" # john = out.eval({sym_x: testvar}) # print("Johns shape") # print(john.shape) print("Building network ...") l_in, l_out = config.build_model() all_layers = nn.layers.get_all_layers(l_out) num_params = nn.layers.count_params(l_out) print(" number of parameters: %d" % num_params) print(" layer output shapes:") for layer in all_layers: name = string.ljust(layer.__class__.__name__, 32) print(" %s %s" % (name, nn.layers.get_output_shape(layer))) print("Creating cost function") # lasagne.layers.get_output produces a variable for the output of the net out_train = nn.layers.get_output( l_out, sym_x, mask=sym_mask, deterministic=False) # testvar = np.ones((128, 700, 42)).astype('float32') # john = out_train.eval({sym_x: testvar}) # print("@@@@@JOHN@@@@@") # print(john.shape) # print(john.reshape((-1, num_classes)).shape) out_eval = nn.layers.get_output( l_out, sym_x, mask=sym_mask, deterministic=True) probs_flat = out_train.reshape((-1, num_classes)) lambda_reg = config.lambda_reg params = nn.layers.get_all_params(l_out, regularizable=True) reg_term = sum(T.sum(p**2) for p in params) cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, TOL, 1-TOL), sym_y.flatten()) cost = T.sum(cost*sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term # Retrieve all parameters from the network all_params = nn.layers.get_all_params(l_out, trainable=True) # Setting the weights if hasattr(config, 'set_weights'): nn.layers.set_all_param_values(l_out, config.set_weights()) # Compute SGD updates for training print("Computing updates ...") if hasattr(config, 'learning_rate_schedule'): learning_rate_schedule = config.learning_rate_schedule # Import learning rate schedule else: learning_rate_schedule = { 0: config.learning_rate } learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) all_grads = T.grad(cost, all_params) cut_norm = config.cut_grad updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True) if optimizer == "rmsprop": updates = nn.updates.rmsprop(updates, all_params, learning_rate) elif optimizer == "adadelta": updates = nn.updates.adadelta(updates, all_params, learning_rate) elif optimizer == "adagrad": updates = nn.updates.adagrad(updates, all_params, learning_rate) elif optimizer == "nag": momentum_schedule = config.momentum_schedule momentum = theano.shared(np.float32(momentum_schedule[0])) updates = nn.updates.nesterov_momentum(updates, all_params, learning_rate, momentum) else: sys.exit("please choose either <rmsprop/adagrad/adadelta/nag> in configfile") # Theano functions for training and computing cost print "config.batch_size %d" %batch_size print "data.num_classes %d" %num_classes if hasattr(config, 'build_model'): print("has build model") print("Compiling functions ...") # Use this for training (see deterministic = False above) train = theano.function( [sym_x, sym_y, sym_mask], [cost, out_train, norm_calc], updates=updates) # use this for eval (deterministic = True + no updates) eval = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval]) # Start timers start_time = time.time() prev_time = start_time all_losses_train = [] all_accuracy_train = [] all_losses_eval_train = [] all_losses_eval_valid = [] all_losses_eval_test = [] all_accuracy_eval_train = [] all_accuracy_eval_valid = [] all_accuracy_eval_test = [] all_mean_norm = [] import data X_train = data.X_train X_valid = data.X_valid X_test = data.X_test y_train = data.labels_train y_valid = data.labels_valid y_test = data.labels_test mask_train = data.mask_train mask_valid = data.mask_valid mask_test = data.mask_test print("y shape") print(y_valid.shape) print("X shape") print(X_valid.shape) # Start training if config.batch_norm: collect_out = nn.layers.get_output(l_out, sym_x, deterministic=True, collect=True) f_collect = theano.function([sym_x], [collect_out]) for epoch in range(num_epochs): if (epoch % 10) == 0: print "Epoch %d of %d" % (epoch + 1, num_epochs) if epoch in learning_rate_schedule: lr = np.float32(learning_rate_schedule[epoch]) print " setting learning rate to %.7f" % lr learning_rate.set_value(lr) if optimizer == "nag": if epoch in momentum_schedule: mu = np.float32(momentum_schedule[epoch]) print " setting learning rate to %.7f" % mu momentum.set_value(mu) print "Shuffling data" seq_names = np.arange(0,data.num_seq_train) np.random.shuffle(seq_names) X_train = X_train[seq_names] y_train = y_train[seq_names] mask_train = mask_train[seq_names] num_batches = data.num_seq_train // batch_size losses = [] preds = [] norms = [] for i in range(num_batches): idx = range(i*batch_size, (i+1)*batch_size) x_batch = X_train[idx] y_batch = y_train[idx] mask_batch = mask_train[idx] loss, out, batch_norm = train(x_batch, y_batch, mask_batch) print(batch_norm) norms.append(batch_norm) preds.append(out) losses.append(loss) # if ((i+1) % config.write_every_batch == 0) | (i == 0): # if i == 0: # start_place = 0 # else: # start_place = i-config.write_every_batch # print "Batch %d of %d" % (i + 1, num_batches) # print " curbatch training loss: %.5f" % np.mean(losses[start_place:(i+1)]) # print " curbatch training acc: %.5f" % np.mean(accuracy[start_place:(i+1)]) predictions = np.concatenate(preds, axis = 0) loss_train = np.mean(losses) all_losses_train.append(loss_train) acc_train = utils.proteins_acc(predictions, y_train[0:num_batches*batch_size], mask_train[0:num_batches*batch_size]) all_accuracy_train.append(acc_train) mean_norm = np.mean(norms) all_mean_norm.append(mean_norm) if 1==1: print " average training loss: %.5f" % loss_train print " average training accuracy: %.5f" % acc_train print " average norm: %.5f" % mean_norm if 1==1:#(i + 1) % config.validate_every == 0: if config.batch_norm: _ = f_collect(X_train) sets = [#('train', X_train, y_train, mask_train, all_losses_eval_train, all_accuracy_eval_train), ('valid', X_valid, y_valid, mask_valid, all_losses_eval_valid, all_accuracy_eval_valid), ('test', X_test, y_test, mask_test, all_losses_eval_test, all_accuracy_eval_test)] for subset, X, y, mask, all_losses, all_accuracy in sets: print " validating: %s loss" % subset preds = [] num_batches = np.size(X,axis=0) // config.batch_size for i in range(num_batches): ## +1 to get the "rest" print(i) idx = range(i*batch_size, (i+1)*batch_size) x_batch = X[idx] y_batch = y[idx] mask_batch = mask[idx] loss, out = eval(x_batch, y_batch, mask_batch) preds.append(out) # acc = utils.proteins_acc(out, y_batch, mask_batch) losses.append(loss) # accuracy.append(acc) predictions = np.concatenate(preds, axis = 0) print " pred" print(predictions.shape) print(predictions.dtype) loss_eval = np.mean(losses) all_losses.append(loss_eval) # acc_eval = np.mean(accuracy) acc_eval = utils.proteins_acc(predictions, y, mask) all_accuracy.append(acc_eval) # print " average evaluation loss (%s): %.5f" % (subset, loss_eval) print " average evaluation accuracy (%s): %.5f" % (subset, acc_eval) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * num_epochs eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0): print " saving parameters and metadata" with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f: pickle.dump({ 'config_name': config_name, 'param_values': nn.layers.get_all_param_values(l_out), 'losses_train': all_losses_train, 'accuracy_train': all_accuracy_train, 'losses_eval_train': all_losses_eval_train, 'losses_eval_valid': all_losses_eval_valid, 'losses_eval_test': all_losses_eval_test, 'accuracy_eval_valid': all_accuracy_eval_valid, 'accuracy_eval_train': all_accuracy_eval_train, 'accuracy_eval_test': all_accuracy_eval_test, 'mean_norm' : all_mean_norm, 'time_since_start': time_since_start, 'i': i, }, f, pickle.HIGHEST_PROTOCOL) print " stored in %s" % metadata_path print
cont_denom]) del outputs now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * \ ((num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_ids[0])) eta = datetime.datetime.now() + \ datetime.timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print(" %s since start (%.2f s)" % ( hms(time_since_start), time_since_prev )) print(" estimated %s to go (ETA: %s)\n" % ( hms(est_time_left), eta_str )) # Save after every validate. if (((e + 1) % save_every) == 0 or ((e + 1) % validate_every) == 0 or ((e + 1) == num_chunks_train)): print("\nSaving model ...") with open(dump_path, 'w') as f: pickle.dump({
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = MODEL_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Using" print " %s" % metadata_path print "To generate" print " %s" % prediction_path print "Build model" interface_layers = config.build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values()) all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print string.ljust(" layer output shapes:", 34), print string.ljust("#params:", 10), print string.ljust("#data:", 10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 30) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(int(num_param).__str__(), 10) num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) print " number of parameters: %d" % num_params xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for (key, l_in) in input_layers.iteritems(): givens[l_in.input_var] = xs_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] print "Compiling..." iter_test = theano.function( [idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) required_input = { key: l_in.output_shape for (key, l_in) in input_layers.iteritems() } print "Preparing dataloaders" config.test_data.prepare() chunk_size = config.batches_per_chunk * config.batch_size test_data_generator = buffering.buffered_gen_threaded( config.test_data.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output={}, )) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) chunks_test_idcs = itertools.count(0) num_chunks_test = math.ceil(1.0 * config.test_data.epochs * config.test_data.number_of_samples / (config.batch_size * config.batches_per_chunk)) start_time, prev_time = None, None all_predictions = dict() print "Loading first chunks" for e, test_data in izip(chunks_test_idcs, test_data_generator): if start_time is None: start_time = time.time() prev_time = start_time print print "Chunk %d/%d" % (e + 1, num_chunks_test) print "==============" if config.dump_network_loaded_data: pickle.dump(test_data, open("data_loader_dump_test_%d.pkl" % e, "wb")) for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) sample_ids = test_data[IDS] for b in xrange(config.batches_per_chunk): th_result = iter_test(b) predictions = th_result[:len(network_outputs)] for output_idx, key in enumerate(output_layers.keys()): for sample_idx in xrange(b * config.batch_size, (b + 1) * config.batch_size): prediction_pos = sample_idx % config.batch_size sample_id = sample_ids[sample_idx] if sample_id is not None: if sample_id not in all_predictions: all_predictions[sample_id] = dict() if key not in all_predictions[sample_id]: all_predictions[sample_id][key] = predictions[ output_idx][prediction_pos] else: all_predictions[sample_id][key] = np.concatenate( (all_predictions[sample_id][key], predictions[output_idx][prediction_pos]), axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now print " %s since start (+%.2f s)" % (utils.hms(time_since_start), time_since_prev) try: if num_chunks_test: est_time_left = time_since_start * (float(num_chunks_test - (e + 1)) / float(e + 1)) eta = datetime.datetime.now() + datetime.timedelta( seconds=est_time_left) eta_str = eta.strftime("%c") print " estimated %s to go" % utils.hms(est_time_left) print " (ETA: %s)" % eta_str except OverflowError: print " This will take really long, like REALLY long." print " %dms per testing sample" % (1000. * time_since_start / ( (e + 1) * config.batch_size * config.batches_per_chunk)) with open(prediction_path, 'w') as f: pickle.dump( { 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'configuration_file': config.__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'predictions': all_predictions, }, f, pickle.HIGHEST_PROTOCOL) print " saved to %s" % prediction_path print return