Python Dataset示例，helpers.input_data.Dataset Python示例

示例#1

0

显示文件

 def test_targets(self):
     ds = Dataset(sample_path)
     tmp = [[a, b, e] for a, b, c, d, e in itertools.islice(
         ds.images_and_targets(resize=False), nb6)]
     scans, slices, targets = zip(*tmp)
     sums = [t.sum() for t in targets]
     self.assertEqual(scans[0], 'ANON_LUNG_TC006')
     self.assertEqual(sums[:93], [0] * 93)
     self.assertEqual(sums[93:99], [50., 123., 195., 192., 147., 41.])
     self.assertEqual(sums[99:], [0] * (nb6 - 99))

示例#2

0

显示文件

 def test_filter_out_blank(self):
     ds = Dataset(sample_path)
     tmp = [[a, b, d] for a, b, c, d in itertools.islice(
         ds.features_and_targets(resize=False, filterBlank=True), 7)]
     scans, slices, targets = zip(*tmp)
     sums = [t.sum() for t in targets]
     self.assertEqual(scans[0], 'ANON_LUNG_TC006')
     self.assertEqual(scans[6], 'ANON_LUNG_TC002')
     self.assertEqual(sums[:6], [50., 123., 195., 192., 147., 41.])
     self.assertEqual(sums[6], 320.)

示例#3

0

显示文件

    def test_features_and_binary_target(self):

        #self.assertEqual(scan[200], 'ANON_LUNG_TC006')
        #scan1 = [0]*144
        #scan1[62:73] = [1]*11
        scan6 = [0] * nb6
        scan6[93 - 1:99 - 1] = [1] * 6
        scan2 = [0] * nb2
        scan2[65 - 1:77 - 1] = [1] * 12

        ds = Dataset(sample_path)
        tmp = [[a, b, d] for a, b, c, d in itertools.islice(
            ds.features_and_binary_targets(), nb6 + nb2)]
        scan, slice, res = zip(*tmp)
        # sanity checks for the rest of the tests.
        # but order shouldn't matter.
        # todo: more flexible
        self.assertEqual(scan[0], 'ANON_LUNG_TC006')
        self.assertEqual(scan[nb6], 'ANON_LUNG_TC002')
        self.assertEqual(slice[nb6], 2)
        self.assertEqual(slice[nb6 + 1], 3)
        self.assertSequenceEqual(res[:nb6], scan6)
        self.assertSequenceEqual(res[nb6:], scan2)

示例#4

0

显示文件

 def test_batch_of_binary(self):
     ds = Dataset(sample_path)
     _, targets = stack([c, d] for a, b, c, d in itertools.islice(
         ds.features_and_binary_targets(), 4))
     self.assertEqual(targets.shape, (4, ))

示例#5

0

显示文件

文件： imprediction2.py 项目： topcoderinc/Harvard-HMS-LC-MM1-Public

from sklearn.externals import joblib

#from helpers.contours import read_coords
from helpers.input_data import Dataset
from helpers.contours import pixelToMM, merge_contours_naive, cv2tolist
from helpers.misc import makebox, display, contrast, flatten

from extract_features import contour_extractors
from extract_features import features_from_contour

if __name__ == '__main__':

    # dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False)
    #dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_valid", withGT=False)
    dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_valid",
                      withGT=False)
    precision_model = "/home/gerey/hms_lung/data/example_extracted/precision_randomforest4.clf"
    recall_model = "/home/gerey/hms_lung/data/example_extracted/recall_randomforest4.clf"

    precision_clf = joblib.load(precision_model)
    recall_clf = joblib.load(recall_model)

    def compute(params):
        scan_id, slice, nbslices, img, aux = params
        print(scan_id, slice)
        res = []
        for idx, contour_extractor in enumerate(contour_extractors):
            for cnt in contour_extractor(img):
                if (len(cnt) >= 5):  # needed for fitEllipse
                    features = [idx, slice, slice / nbslices
                                ] + features_from_contour(cnt, img)

示例#6

0

显示文件

文件： stats.py 项目： topcoderinc/Harvard-HMS-LC-MM1-Public

# collect some stats
from itertools import islice

from helpers.input_data import Dataset


def get_bounding_box_stats(dataset):

    collect = []
    for scan_id, slice_idx, contour in dataset.get_contours():
        lft, top = contour.min(axis=(0,1))
        rgt, bot = contour.max(axis=(0,1))
        collect.append((scan_id, slice_idx, top, bot, lft, rgt))

    print("min top: %s"    % str(min(collect,key=lambda x:x[2])))
    print("max bot: %s"    % str(max(collect,key=lambda x:x[3])))
    print("min left: %s"   % str(min(collect,key=lambda x:x[4])))
    print("max rigth: %s"  % str(max(collect,key=lambda x:x[5])))


if __name__ == '__main__':

    dataset = Dataset("/home/gerey/hms_lung/data/example_extracted")
    get_bounding_box_stats(dataset)

示例#7

0

显示文件

import numpy
import tensorflow as tf
from helpers.input_data import Dataset

from trainer import PIXEL_DEPTH, IMAGE_SIZE, TARGET_SIZE, NUM_CHANNELS, model

dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_sample")

decode_data_node = tf.placeholder(
  tf.float32,
  shape=(1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
  name= "yg2_input_decoding")
#train_labels_node = tf.placeholder(
#  tf.float32,
#  shape=(1, TARGET_SIZE, TARGET_SIZE))

with tf.Session() as sess:

  batch_data, batch_labels = dataset.next_batch(1)
  # TODO: what if datatype()==tf.float16 ?
  batch_data = batch_data.astype(numpy.float32) / PIXEL_DEPTH - 0.5

  # This dictionary maps the batch data (as a numpy array) to the
  # node in the graph it should be fed to.
  feed_dict = {decode_data_node: batch_data }
               # train_labels_node: batch_labels}

  # Try to rebuild a saved graph doesn't seem to work
  #new_saver = tf.train.import_meta_graph('/home/gerey/hms_lung/models/no-weights-regul-0.meta')
  #forward_op = tf.get_collection('mon_decoder')[0]

示例#8

0

显示文件

def enhance(sol, dataset):

    for scan in dataset.scans():
        print(scan.id())
        scan3D = scan.scan3D()
        sol3D = make_sol3d(sol, scan)
        enhance_scan(sol3D, scan3D)
        for idx, slice in enumerate(sol3D):
            cnts = findContours(slice)
            for cnt in cnts:
                coords = flatten(
                    pixelToMM(scan.aux[idx + 1], x, y)
                    for (x, y) in cv2tolist(cnt))
                yield [scan.id(), idx + 1] + ["%.4f" % xy for xy in coords]


if __name__ == '__main__':

    dataset = Dataset("/home/gerey/hms_lung/data/extract", withGT=False)
    csvpath = "/home/gerey/hms_lung/single.csv"
    output = "/home/gerey/hms_lung/single_enhanced2.csv"
    # dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False)
    # csvpath = "/home/gerey/hms_lung/predictions27.csv"
    # output  = "/home/gerey/hms_lung/enhanced27.csv"
    sol = solution2dict(csvpath)

    with open(output, "w") as f:
        for s in enhance(sol, dataset):
            f.write(",".join(map(str, s)))
            f.write("\n")

示例#9

0

显示文件

def main(_):

    # Data provider.
    train_data = Dataset("/home/gerey/hms_lung/data/example_extracted")
    #train_data = Dataset("/home/gerey/hms_lung/data/example_extracted_sample")

    num_epochs = NUM_EPOCHS
    train_size = train_data.nb_scans() * 100  # Approximation, nevermind

    # This is where training samples and labels are fed to the graph.
    # These placeholder nodes will be fed a batch of training data at each
    # training step using the {feed_dict} argument to the Run() call below.
    train_data_node = tf.placeholder(data_type(),
                                     shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE,
                                            NUM_CHANNELS),
                                     name="training_samples")
    train_labels_node = tf.placeholder(data_type(),
                                       shape=(BATCH_SIZE, TARGET_SIZE,
                                              TARGET_SIZE),
                                       name="training_labels")
    variable_summaries(train_data_node, "input")
    variable_summaries(train_labels_node, "target")
    #eval_data = tf.placeholder(
    #    data_type(),
    #    shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
    #decode_node = tf.placeholder(
    #    data_type(),
    #    shape=(DECODING_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
    #    name="ygy_input_decoding")

    # Training computation: l2 loss.
    # TODO: Better loss function!
    pred = model(train_data_node, True)
    #loss = tf.reduce_mean(tf.nn.l2_loss(logits, train_labels_node))
    loss = tf.reduce_mean(tf.square(pred - train_labels_node), name="loss")

    # L2 regularization for the fully connected parameters.
    regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                    tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
    # Add the regularization term to the loss.
    loss += 5e-7 * regularizers

    # Optimizer: set up a variable that's incremented once per batch and
    # controls the learning rate decay.
    batch = tf.Variable(0, dtype=data_type())
    # Decay once per epoch, using an exponential schedule starting at 0.01.
    learning_rate = tf.train.exponential_decay(
        0.01,  # Base learning rate.
        batch * BATCH_SIZE,  # Current index into the dataset.
        train_size,  # Decay step.
        0.95,  # Decay rate.
        staircase=True)
    # Use simple momentum for the optimization.
    optimizer = tf.train.MomentumOptimizer(learning_rate,
                                           0.9).minimize(loss,
                                                         global_step=batch)

    # Predictions for the current training minibatch.
    # train_prediction = tf.nn.softmax(logits)
    train_prediction = pred
    variable_summaries(train_prediction, "prediction")

    # Predictions for the test and validation, which we'll compute less often.
    # eval_prediction = tf.nn.softmax(model(eval_data))
    # eval_prediction = model(eval_data)

    # Small utility function to evaluate a dataset by feeding batches of data to
    # {eval_data} and pulling the results from {eval_predictions}.
    # Saves memory and enables this to run on smaller GPUs.
    def eval_in_batches(data, sess):
        """Get all predictions for a dataset by running it in small batches."""
        size = data.shape[0]
        if size < EVAL_BATCH_SIZE:
            raise ValueError("batch size for evals larger than dataset: %d" %
                             size)
        predictions = numpy.ndarray(shape=(size, NUM_LABELS),
                                    dtype=numpy.float32)
        for begin in range(0, size, EVAL_BATCH_SIZE):
            end = begin + EVAL_BATCH_SIZE
            if end <= size:
                predictions[begin:end, :] = sess.run(
                    eval_prediction,
                    feed_dict={eval_data: data[begin:end, ...]})
            else:
                batch_predictions = sess.run(
                    eval_prediction,
                    feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]})
                predictions[begin:, :] = batch_predictions[begin - size:, :]
        return predictions

    # check_op = tf.add_check_numerics_ops() # To check for NAN
    check_op = None

    # Create a saver.
    saver = tf.train.Saver()  # Default to save all savable objets
    # Remember the op we want to run by adding it to a collection.
    #tf.add_to_collection('mon_decoder', model(decode_node))

    # Create a local session to run the training.
    start_time = time.time()
    with tf.Session() as sess:

        # Merge all the summaries
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter('/home/gerey/hms_lung/log2/',
                                             sess.graph)

        # Run all the initializers to prepare the trainable parameters.
        tf.global_variables_initializer().run()
        print('Initialized!')
        # Loop through training steps.
        train_size = 10000  # TODO
        it = itertools.cycle(
            batcher(
                ((f, t) for _, _, f, t in train_data.features_and_targets()),
                BATCH_SIZE))
        for step in range(int(num_epochs * train_size) // BATCH_SIZE):

            batch_data, batch_labels = next(it)
            # TODO: what if datatype()==tf.float16 ?
            batch_data = batch_data.astype(numpy.float32) / PIXEL_DEPTH - 0.5

            if check_op is not None:
                assert not numpy.isnan(batch_data.sum())
                assert not numpy.isnan(batch_labels.sum())

            # This dictionary maps the batch data (as a numpy array) to the
            # node in the graph it should be fed to.
            feed_dict = {
                train_data_node: batch_data,
                train_labels_node: batch_labels
            }

            # Run the optimizer to update weights.
            if check_op is not None:
                sess.run([optimizer, check_op], feed_dict=feed_dict)
            else:
                _, summary = sess.run([optimizer, merged], feed_dict=feed_dict)
                train_writer.add_summary(summary, step)

            # Saves checkpoint, which by default also exports a meta_graph
            if step % 100 == 0:
                saver.save(
                    sess,
                    '/home/gerey/hms_lung/models_coefs/2-proper-samples/',
                    global_step=step)
                print("Saved step %d" % (step, ))

            # print some extra information once reach the evaluation frequency
            if step % EVAL_FREQUENCY == 0:
                # fetch some extra nodes' data
                if check_op is not None:
                    l, lr, predictions, _ = sess.run(
                        [loss, learning_rate, train_prediction, check_op],
                        feed_dict=feed_dict)
                else:
                    l, lr, predictions = sess.run(
                        [loss, learning_rate, train_prediction],
                        feed_dict=feed_dict)

                elapsed_time = time.time() - start_time
                start_time = time.time()
                print('Step %d (epoch %.2f), %.1f ms' %
                      (step, float(step) * BATCH_SIZE / train_size,
                       1000 * elapsed_time / EVAL_FREQUENCY))
                print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
                print('Minibatch error: %.1f%%' %
                      error_rate(predictions, batch_labels))
                # print('Validation error: %.1f%%' % error_rate(
                #     eval_in_batches(validation_data, sess), validation_labels))
                sys.stdout.flush()

示例#10

0

显示文件

    kept = []
    for cnt in contours:
        for kp in keypoints:
            if cv2.pointPolygonTest(cnt, kp.pt, False) >= 0:
                kept.append(cnt)
                break

    # Now get convex hull (more likely)
    return [cv2tolist(cnt) for cnt in kept]
    return [cv2tolist(cv2.convexHull(cnt, clockwise=False)) for cnt in kept]


if __name__ == '__main__':

    #dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False)
    dataset = Dataset("/home/gerey/hms_lung/data/example_extracted_valid",
                      withGT=False)
    #dataset = Dataset("/home/gerey/hms_lung/data/extract", withGT=False)

    scan_index = 1
    solutions = []
    for scan in dataset.scans():
        for id, img, aux in scan.images_aux():
            scan_id = scan.id()
            # only process middle slices
            # TODO: expend once false negatives are cleared
            if scan.nb_slices() / 4 < id < scan.nb_slices() * 1.7 / 3:
                print(scan_id, id, len(slices))
                contours = contouring_binsym(img)
                for contour in contours:
                    coords = flatten(
                        pixelToMM(aux[id], x, y) for (x, y) in contour)

示例#11

0

显示文件

    pool = multiprocessing.Pool(10)
    return flatten(pool.imap(compute_features, iter))


def main(dataset, output):

    # it = iter(generate_features(dataset))
    # with open(output, "w") as f:
    #     for _ in range(2):
    #         print(" ".join(map(str, next(it))), file=f)
    with open(output, "w") as f:
        for features in generate_features(dataset):
            print(" ".join(map(str, features)), file=f)


if __name__ == '__main__':

    output = "/home/gerey/hms_lung/data/example_extracted/features6.ssv"

    train = Dataset("/home/gerey/hms_lung/data/example_extracted")
    train_iter = train.images_and_targets()

    if 1:
        valid = Dataset("/home/gerey/hms_lung/data/example_extracted_valid")
        valid_iter = valid.images_and_targets()
        full_iter = itertools.chain(train_iter, valid_iter)
    else:
        full_iter = train_iter

    main(full_iter, output)

示例#12

0

显示文件

def main(argv=None):
    keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
    image = tf.placeholder(tf.float32,
                           shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3],
                           name="input_image")
    annotation = tf.placeholder(tf.uint8,
                                shape=[None, IMAGE_SIZE, IMAGE_SIZE],
                                name="annotation")

    pred_annotation, logits = inference(image, keep_probability)
    tf.summary.image("input_image", image, max_outputs=2)
    #tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)
    tf.summary.image("pred_annotation",
                     tf.cast(pred_annotation, tf.uint8),
                     max_outputs=2)
    loss = tf.reduce_mean((
        tf.nn.weighted_cross_entropy_with_logits(
            logits=logits,  # Class1 = tumor
            targets=tf.one_hot(annotation, NUM_OF_CLASSESS),
            name="entropy",
            pos_weight=10)))
    tf.summary.scalar("entropy", loss)

    trainable_var = tf.trainable_variables()
    if FLAGS.debug:
        for var in trainable_var:
            utils.add_to_regularization_and_summary(var)
    train_op = train(loss, trainable_var)

    print("Setting up summary op...")
    summary_op = tf.summary.merge_all()

    print("Setting up dataset reader")
    # image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
    image_options = {'resize': False, 'resize_size': IMAGE_SIZE}
    if FLAGS.mode == 'train':
        train_dataset = Dataset("/home/gerey/hms_lung/data/example_extracted")
    validation_dataset = Dataset(
        "/home/gerey/hms_lung/data/example_extracted_valid")

    sess = tf.Session()

    print("Setting up Saver...")
    saver = tf.train.Saver()
    summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph)

    sess.run(tf.initialize_all_variables())
    ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("Model restored...")

    valid_it = itertools.cycle(
        batcher(((f, t)
                 for _, _, f, t in validation_dataset.features_and_targets(
                     resize=False, filterBlank=True)), FLAGS.batch_size))

    if FLAGS.mode == "train":
        train_it = itertools.cycle(
            batcher(((f, t)
                     for _, _, f, t in train_dataset.features_and_targets(
                         resize=False, filterBlank=True)), FLAGS.batch_size))
        for itr in range(MAX_ITERATION):
            train_images, train_annotations = next(train_it)
            #train_annotations = np.expand_dims(train_annotations, axis=3)
            feed_dict = {
                image: train_images,
                annotation: train_annotations,
                keep_probability: 0.85
            }

            sess.run(train_op, feed_dict=feed_dict)

            if itr % 10 == 0:
                train_loss, summary_str = sess.run([loss, summary_op],
                                                   feed_dict=feed_dict)
                print("Step: %d, Train_loss:%g" % (itr, train_loss))
                summary_writer.add_summary(summary_str, itr)

            if itr % 500 == 0:
                valid_images, valid_annotations = next(valid_it)
                # valid_annotations = np.expand_dims(valid_annotations, axis=3)
                valid_loss = sess.run(loss,
                                      feed_dict={
                                          image: valid_images,
                                          annotation: valid_annotations,
                                          keep_probability: 1.0
                                      })
                print("%s ---> Validation_loss: %g" %
                      (datetime.datetime.now(), valid_loss))
                saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)

    elif FLAGS.mode == "visualize":
        vizu_it = batcher(
            ((f, t) for _, _, f, t in validation_dataset.features_and_targets(
                resize=False, filterBlank=True)), 1)

        #for itr in range(FLAGS.batch_size):
        for itr in range(100):
            valid_images, valid_annotations = next(
                vizu_it
            )  # FIXME: get random_batch ? validation_dataset_reader.get_random_batch(FLAGS.batch_size)
            #valid_annotations = np.expand_dims(valid_annotations, axis=3)
            pred = sess.run(pred_annotation,
                            feed_dict={
                                image: valid_images,
                                annotation: valid_annotations,
                                keep_probability: 1.0
                            })
            #valid_annotations = np.squeeze(valid_annotations, axis=3)
            pred = np.squeeze(pred, axis=3)
            #utils.save_image(valid_images[0].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr))
            #utils.save_image(valid_annotations[0].astype(np.uint8)*255, FLAGS.logs_dir, name="gt_" + str(5+itr))
            utils.save_image(pred[0].astype(np.uint8) * 255,
                             FLAGS.logs_dir,
                             name="pred_" + str(5 + itr))
            print("Saved image: %d" % itr)

示例#13

0

显示文件

def gen_params():
    for scan in dataset.scans():
        scan3D = scan.scan3D()
        if USE_LUNG:
            lung = segment_lung_mask(scan3D, fill_lung_structures=True)
            if MAX_MASK:
                lung = np.any(lung, axis=0)
        else:
            lung = None
        for scan_id, slice_idx, aux in scan.gen_aux():
            yield scan_id, slice_idx, scan.nb_slices(), aux, scan3D, lung


if __name__ == '__main__':

    # dataset = Dataset("/home/gerey/hms_lung/data/provisional_extracted_no_gt", withGT=False)
    dataset = Dataset(
        "/home/gerey/hms_lung/data/example_extracted_valid_small2",
        withGT=False)

    pool = multiprocessing.Pool(30)
    solutions = flatten(pool.imap(compute, gen_params()))
    # solutions = flatten(map( compute, gen_params() ))

    with open("/home/gerey/hms_lung/exemple_predictions_sample2_33.csv",
              "w") as f:
        # with open("/home/gerey/hms_lung/predictions31.csv","w") as f:
        for s in solutions:
            f.write(",".join(map(str, s)))
            f.write("\n")