def test_targets(self): ds = Dataset(sample_path) tmp = [[a, b, e] for a, b, c, d, e in itertools.islice( ds.images_and_targets(resize=False), nb6)] scans, slices, targets = zip(*tmp) sums = [t.sum() for t in targets] self.assertEqual(scans[0], 'ANON_LUNG_TC006') self.assertEqual(sums[:93], [0] * 93) self.assertEqual(sums[93:99], [50., 123., 195., 192., 147., 41.]) self.assertEqual(sums[99:], [0] * (nb6 - 99))
def test_filter_out_blank(self): ds = Dataset(sample_path) tmp = [[a, b, d] for a, b, c, d in itertools.islice( ds.features_and_targets(resize=False, filterBlank=True), 7)] scans, slices, targets = zip(*tmp) sums = [t.sum() for t in targets] self.assertEqual(scans[0], 'ANON_LUNG_TC006') self.assertEqual(scans[6], 'ANON_LUNG_TC002') self.assertEqual(sums[:6], [50., 123., 195., 192., 147., 41.]) self.assertEqual(sums[6], 320.)
def test_features_and_binary_target(self): #self.assertEqual(scan[200], 'ANON_LUNG_TC006') #scan1 = [0]*144 #scan1[62:73] = [1]*11 scan6 = [0] * nb6 scan6[93 - 1:99 - 1] = [1] * 6 scan2 = [0] * nb2 scan2[65 - 1:77 - 1] = [1] * 12 ds = Dataset(sample_path) tmp = [[a, b, d] for a, b, c, d in itertools.islice( ds.features_and_binary_targets(), nb6 + nb2)] scan, slice, res = zip(*tmp) # sanity checks for the rest of the tests. # but order shouldn't matter. # todo: more flexible self.assertEqual(scan[0], 'ANON_LUNG_TC006') self.assertEqual(scan[nb6], 'ANON_LUNG_TC002') self.assertEqual(slice[nb6], 2) self.assertEqual(slice[nb6 + 1], 3) self.assertSequenceEqual(res[:nb6], scan6) self.assertSequenceEqual(res[nb6:], scan2)
def test_batch_of_binary(self): ds = Dataset(sample_path) _, targets = stack([c, d] for a, b, c, d in itertools.islice( ds.features_and_binary_targets(), 4)) self.assertEqual(targets.shape, (4, ))
from sklearn.externals import joblib #from helpers.contours import read_coords from helpers.input_data import Dataset from helpers.contours import pixelToMM, merge_contours_naive, cv2tolist from helpers.misc import makebox, display, contrast, flatten from extract_features import contour_extractors from extract_features import features_from_contour if __name__ == '__main__': # dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False) #dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_valid", withGT=False) dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_valid", withGT=False) precision_model = "/home/gerey/hms_lung/data/example_extracted/precision_randomforest4.clf" recall_model = "/home/gerey/hms_lung/data/example_extracted/recall_randomforest4.clf" precision_clf = joblib.load(precision_model) recall_clf = joblib.load(recall_model) def compute(params): scan_id, slice, nbslices, img, aux = params print(scan_id, slice) res = [] for idx, contour_extractor in enumerate(contour_extractors): for cnt in contour_extractor(img): if (len(cnt) >= 5): # needed for fitEllipse features = [idx, slice, slice / nbslices ] + features_from_contour(cnt, img)
# collect some stats from itertools import islice from helpers.input_data import Dataset def get_bounding_box_stats(dataset): collect = [] for scan_id, slice_idx, contour in dataset.get_contours(): lft, top = contour.min(axis=(0,1)) rgt, bot = contour.max(axis=(0,1)) collect.append((scan_id, slice_idx, top, bot, lft, rgt)) print("min top: %s" % str(min(collect,key=lambda x:x[2]))) print("max bot: %s" % str(max(collect,key=lambda x:x[3]))) print("min left: %s" % str(min(collect,key=lambda x:x[4]))) print("max rigth: %s" % str(max(collect,key=lambda x:x[5]))) if __name__ == '__main__': dataset = Dataset("/home/gerey/hms_lung/data/example_extracted") get_bounding_box_stats(dataset)
import numpy import tensorflow as tf from helpers.input_data import Dataset from trainer import PIXEL_DEPTH, IMAGE_SIZE, TARGET_SIZE, NUM_CHANNELS, model dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_sample") decode_data_node = tf.placeholder( tf.float32, shape=(1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), name= "yg2_input_decoding") #train_labels_node = tf.placeholder( # tf.float32, # shape=(1, TARGET_SIZE, TARGET_SIZE)) with tf.Session() as sess: batch_data, batch_labels = dataset.next_batch(1) # TODO: what if datatype()==tf.float16 ? batch_data = batch_data.astype(numpy.float32) / PIXEL_DEPTH - 0.5 # This dictionary maps the batch data (as a numpy array) to the # node in the graph it should be fed to. feed_dict = {decode_data_node: batch_data } # train_labels_node: batch_labels} # Try to rebuild a saved graph doesn't seem to work #new_saver = tf.train.import_meta_graph('/home/gerey/hms_lung/models/no-weights-regul-0.meta') #forward_op = tf.get_collection('mon_decoder')[0]
def enhance(sol, dataset): for scan in dataset.scans(): print(scan.id()) scan3D = scan.scan3D() sol3D = make_sol3d(sol, scan) enhance_scan(sol3D, scan3D) for idx, slice in enumerate(sol3D): cnts = findContours(slice) for cnt in cnts: coords = flatten( pixelToMM(scan.aux[idx + 1], x, y) for (x, y) in cv2tolist(cnt)) yield [scan.id(), idx + 1] + ["%.4f" % xy for xy in coords] if __name__ == '__main__': dataset = Dataset("/home/gerey/hms_lung/data/extract", withGT=False) csvpath = "/home/gerey/hms_lung/single.csv" output = "/home/gerey/hms_lung/single_enhanced2.csv" # dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False) # csvpath = "/home/gerey/hms_lung/predictions27.csv" # output = "/home/gerey/hms_lung/enhanced27.csv" sol = solution2dict(csvpath) with open(output, "w") as f: for s in enhance(sol, dataset): f.write(",".join(map(str, s))) f.write("\n")
def main(_): # Data provider. train_data = Dataset("/home/gerey/hms_lung/data/example_extracted") #train_data = Dataset("/home/gerey/hms_lung/data/example_extracted_sample") num_epochs = NUM_EPOCHS train_size = train_data.nb_scans() * 100 # Approximation, nevermind # This is where training samples and labels are fed to the graph. # These placeholder nodes will be fed a batch of training data at each # training step using the {feed_dict} argument to the Run() call below. train_data_node = tf.placeholder(data_type(), shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), name="training_samples") train_labels_node = tf.placeholder(data_type(), shape=(BATCH_SIZE, TARGET_SIZE, TARGET_SIZE), name="training_labels") variable_summaries(train_data_node, "input") variable_summaries(train_labels_node, "target") #eval_data = tf.placeholder( # data_type(), # shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) #decode_node = tf.placeholder( # data_type(), # shape=(DECODING_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), # name="ygy_input_decoding") # Training computation: l2 loss. # TODO: Better loss function! pred = model(train_data_node, True) #loss = tf.reduce_mean(tf.nn.l2_loss(logits, train_labels_node)) loss = tf.reduce_mean(tf.square(pred - train_labels_node), name="loss") # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) # Add the regularization term to the loss. loss += 5e-7 * regularizers # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0, dtype=data_type()) # Decay once per epoch, using an exponential schedule starting at 0.01. learning_rate = tf.train.exponential_decay( 0.01, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size, # Decay step. 0.95, # Decay rate. staircase=True) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step=batch) # Predictions for the current training minibatch. # train_prediction = tf.nn.softmax(logits) train_prediction = pred variable_summaries(train_prediction, "prediction") # Predictions for the test and validation, which we'll compute less often. # eval_prediction = tf.nn.softmax(model(eval_data)) # eval_prediction = model(eval_data) # Small utility function to evaluate a dataset by feeding batches of data to # {eval_data} and pulling the results from {eval_predictions}. # Saves memory and enables this to run on smaller GPUs. def eval_in_batches(data, sess): """Get all predictions for a dataset by running it in small batches.""" size = data.shape[0] if size < EVAL_BATCH_SIZE: raise ValueError("batch size for evals larger than dataset: %d" % size) predictions = numpy.ndarray(shape=(size, NUM_LABELS), dtype=numpy.float32) for begin in range(0, size, EVAL_BATCH_SIZE): end = begin + EVAL_BATCH_SIZE if end <= size: predictions[begin:end, :] = sess.run( eval_prediction, feed_dict={eval_data: data[begin:end, ...]}) else: batch_predictions = sess.run( eval_prediction, feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]}) predictions[begin:, :] = batch_predictions[begin - size:, :] return predictions # check_op = tf.add_check_numerics_ops() # To check for NAN check_op = None # Create a saver. saver = tf.train.Saver() # Default to save all savable objets # Remember the op we want to run by adding it to a collection. #tf.add_to_collection('mon_decoder', model(decode_node)) # Create a local session to run the training. start_time = time.time() with tf.Session() as sess: # Merge all the summaries merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('/home/gerey/hms_lung/log2/', sess.graph) # Run all the initializers to prepare the trainable parameters. tf.global_variables_initializer().run() print('Initialized!') # Loop through training steps. train_size = 10000 # TODO it = itertools.cycle( batcher( ((f, t) for _, _, f, t in train_data.features_and_targets()), BATCH_SIZE)) for step in range(int(num_epochs * train_size) // BATCH_SIZE): batch_data, batch_labels = next(it) # TODO: what if datatype()==tf.float16 ? batch_data = batch_data.astype(numpy.float32) / PIXEL_DEPTH - 0.5 if check_op is not None: assert not numpy.isnan(batch_data.sum()) assert not numpy.isnan(batch_labels.sum()) # This dictionary maps the batch data (as a numpy array) to the # node in the graph it should be fed to. feed_dict = { train_data_node: batch_data, train_labels_node: batch_labels } # Run the optimizer to update weights. if check_op is not None: sess.run([optimizer, check_op], feed_dict=feed_dict) else: _, summary = sess.run([optimizer, merged], feed_dict=feed_dict) train_writer.add_summary(summary, step) # Saves checkpoint, which by default also exports a meta_graph if step % 100 == 0: saver.save( sess, '/home/gerey/hms_lung/models_coefs/2-proper-samples/', global_step=step) print("Saved step %d" % (step, )) # print some extra information once reach the evaluation frequency if step % EVAL_FREQUENCY == 0: # fetch some extra nodes' data if check_op is not None: l, lr, predictions, _ = sess.run( [loss, learning_rate, train_prediction, check_op], feed_dict=feed_dict) else: l, lr, predictions = sess.run( [loss, learning_rate, train_prediction], feed_dict=feed_dict) elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.1f ms' % (step, float(step) * BATCH_SIZE / train_size, 1000 * elapsed_time / EVAL_FREQUENCY)) print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels)) # print('Validation error: %.1f%%' % error_rate( # eval_in_batches(validation_data, sess), validation_labels)) sys.stdout.flush()
kept = [] for cnt in contours: for kp in keypoints: if cv2.pointPolygonTest(cnt, kp.pt, False) >= 0: kept.append(cnt) break # Now get convex hull (more likely) return [cv2tolist(cnt) for cnt in kept] return [cv2tolist(cv2.convexHull(cnt, clockwise=False)) for cnt in kept] if __name__ == '__main__': #dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False) dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_valid", withGT=False) #dataset = Dataset("/home/gerey/hms_lung/data/extract", withGT=False) scan_index = 1 solutions = [] for scan in dataset.scans(): for id, img, aux in scan.images_aux(): scan_id = scan.id() # only process middle slices # TODO: expend once false negatives are cleared if scan.nb_slices() / 4 < id < scan.nb_slices() * 1.7 / 3: print(scan_id, id, len(slices)) contours = contouring_binsym(img) for contour in contours: coords = flatten( pixelToMM(aux[id], x, y) for (x, y) in contour)
pool = multiprocessing.Pool(10) return flatten(pool.imap(compute_features, iter)) def main(dataset, output): # it = iter(generate_features(dataset)) # with open(output, "w") as f: # for _ in range(2): # print(" ".join(map(str, next(it))), file=f) with open(output, "w") as f: for features in generate_features(dataset): print(" ".join(map(str, features)), file=f) if __name__ == '__main__': output = "/home/gerey/hms_lung/data/example_extracted/features6.ssv" train = Dataset("/home/gerey/hms_lung/data/example_extracted") train_iter = train.images_and_targets() if 1: valid = Dataset("/home/gerey/hms_lung/data/example_extracted_valid") valid_iter = valid.images_and_targets() full_iter = itertools.chain(train_iter, valid_iter) else: full_iter = train_iter main(full_iter, output)
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.uint8, shape=[None, IMAGE_SIZE, IMAGE_SIZE], name="annotation") pred_annotation, logits = inference(image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) #tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) loss = tf.reduce_mean(( tf.nn.weighted_cross_entropy_with_logits( logits=logits, # Class1 = tumor targets=tf.one_hot(annotation, NUM_OF_CLASSESS), name="entropy", pos_weight=10))) tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up dataset reader") # image_options = {'resize': True, 'resize_size': IMAGE_SIZE} image_options = {'resize': False, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset = Dataset("/home/gerey/hms_lung/data/example_extracted") validation_dataset = Dataset( "/home/gerey/hms_lung/data/example_extracted_valid") sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") valid_it = itertools.cycle( batcher(((f, t) for _, _, f, t in validation_dataset.features_and_targets( resize=False, filterBlank=True)), FLAGS.batch_size)) if FLAGS.mode == "train": train_it = itertools.cycle( batcher(((f, t) for _, _, f, t in train_dataset.features_and_targets( resize=False, filterBlank=True)), FLAGS.batch_size)) for itr in range(MAX_ITERATION): train_images, train_annotations = next(train_it) #train_annotations = np.expand_dims(train_annotations, axis=3) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.85 } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) summary_writer.add_summary(summary_str, itr) if itr % 500 == 0: valid_images, valid_annotations = next(valid_it) # valid_annotations = np.expand_dims(valid_annotations, axis=3) valid_loss = sess.run(loss, feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) elif FLAGS.mode == "visualize": vizu_it = batcher( ((f, t) for _, _, f, t in validation_dataset.features_and_targets( resize=False, filterBlank=True)), 1) #for itr in range(FLAGS.batch_size): for itr in range(100): valid_images, valid_annotations = next( vizu_it ) # FIXME: get random_batch ? validation_dataset_reader.get_random_batch(FLAGS.batch_size) #valid_annotations = np.expand_dims(valid_annotations, axis=3) pred = sess.run(pred_annotation, feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) #valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) #utils.save_image(valid_images[0].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr)) #utils.save_image(valid_annotations[0].astype(np.uint8)*255, FLAGS.logs_dir, name="gt_" + str(5+itr)) utils.save_image(pred[0].astype(np.uint8) * 255, FLAGS.logs_dir, name="pred_" + str(5 + itr)) print("Saved image: %d" % itr)
def gen_params(): for scan in dataset.scans(): scan3D = scan.scan3D() if USE_LUNG: lung = segment_lung_mask(scan3D, fill_lung_structures=True) if MAX_MASK: lung = np.any(lung, axis=0) else: lung = None for scan_id, slice_idx, aux in scan.gen_aux(): yield scan_id, slice_idx, scan.nb_slices(), aux, scan3D, lung if __name__ == '__main__': # dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False) dataset = Dataset( "/home/gerey/hms_lung/data/example_extracted_valid_small2", withGT=False) pool = multiprocessing.Pool(30) solutions = flatten(pool.imap(compute, gen_params())) # solutions = flatten(map( compute, gen_params() )) with open("/home/gerey/hms_lung/exemple_predictions_sample2_33.csv", "w") as f: # with open("/home/gerey/hms_lung/predictions31.csv","w") as f: for s in solutions: f.write(",".join(map(str, s))) f.write("\n")