Exemplo n.º 1
0
def image_batcher(start, num_batches, images, config, training_max,
                  training_min):
    for b in range(num_batches):
        next_image_batch = images[start:start + config.validation_batch]
        image_stack = []
        for f in next_image_batch:
            # 1. Load image patch
            patch = produce_patch(f,
                                  config.channel,
                                  config.panel,
                                  divide_panel=config.divide_panel,
                                  max_value=config.max_gedi,
                                  min_value=config.min_gedi).astype(np.float32)
            # 2. Repeat to 3 channel (RGB) image
            patch = np.repeat(patch[:, :, None], 3, axis=-1)
            # 3. Renormalize based on the training set intensities
            patch = renormalize(patch,
                                max_value=training_max,
                                min_value=training_min)
            # 4. Crop the center
            patch = crop_center(patch, config.model_image_size[:2])
            # 5. Clip to [0, 1] just in case
            patch[patch > 1.] = 1.
            patch[patch < 0.] = 0.
            # 6. Add to list
            image_stack += [patch[None, :, :, :]]
        # Add dimensions and concatenate
        start += config.validation_batch
        print(type(next_image_batch))
        yield np.concatenate(image_stack, axis=0), next_image_batch
def extract_to_tf_records(
        files,
        label_list,
        ratio_list,
        output_pointer,
        config,
        k,
        rescale=True):
    """Extract images as TF record files."""
    print('Building %s: %s' % (k, config.tfrecord_dir))
    max_array = []
    min_array = []
    nan_images = []
    count = 0
    with tf.python_io.TFRecordWriter(output_pointer) as tfrecord_writer:
        for idx, (f, l) in tqdm(
            enumerate(
                zip(files, label_list)), total=len(files)):
            r = get_image_ratio(
                f,
                ratio_list,
                timepoints=config.channel,
                id_column=config.id_column,
                regex_match=config.ratio_regex)
            num_timepoints = derive_timepoints(f)
            images = produce_patch(
                f,
                config.channel,
                config.panel,
                divide_panel=config.divide_panel,
                max_value=config.max_gedi,
                min_value=config.min_gedi,
                return_raw=True).astype(np.float32)

            # Produce {t, t+1} image set for every pair in images
            for t in range(1, num_timepoints):
                it_im = images[t - 1]
                next_im = images[t]
                if rescale:
                    it_im = rescale_patch(
                        it_im,
                        min_value=it_im.max(),
                        max_value=it_im.min())
                    next_im = rescale_patch(
                        next_im,
                        min_value=next_im.max(),
                        max_value=next_im.min())
                max_array += [np.max(it_im)]
                min_array += [np.min(it_im)]

                if np.isnan(it_im).sum():
                    nan_images += [1]

                # construct the Example proto boject
                feature_dict = features_to_dict(
                    label=l,
                    image=it_im,
                    filename=f,
                    ratio=r,
                    gedi_image=None,  # Not implemented
                    extra_image=next_im)
                count += 1
                example = tf.train.Example(
                    # Example contains a Features proto object
                    features=tf.train.Features(
                        # Features has a map of string to Feature proto objects
                        feature=feature_dict
                    )
                )
                # use the proto object to serialize the example to a string
                serialized = example.SerializeToString()
                # write the serialized object to disk
                tfrecord_writer.write(serialized)

    # Calculate ratio of +:-
    lab_counts = np.asarray(
        [np.sum(label_list == 0), np.sum(label_list == 1)]).astype(float)
    ratio = lab_counts / np.asarray((len(label_list))).astype(float)
    print('Data ratio is %s' % ratio)
    np.savez(
        os.path.join(
            config.tfrecord_dir, k + '_' + config.max_file),
        max_array=max_array,
        min_array=min_array,
        ratio=ratio,
        filenames=files,
        nan_images=nan_images)
    return max_array, min_array
Exemplo n.º 3
0
def image_batcher(
        start,
        num_batches,
        images,
        labels,
        config,
        training_max,
        training_min,
        num_channels=3,
        per_timepoint=False):
    """Placeholder image/label batch loader."""
    for b in range(num_batches):
        next_image_batch = images[start:start + config.validation_batch]
        image_stack, output_files = [], []
        label_stack = labels[start:start + config.validation_batch]
        for f in next_image_batch:
            if per_timepoint:
                for channel in range(num_channels):
                    # 1. Load image patch
                    patch = produce_patch(
                        f,
                        channel,
                        config.panel,
                        divide_panel=config.divide_panel,
                        max_value=config.max_gedi,
                        min_value=config.min_gedi).astype(np.float32)
                    # 2. Repeat to 3 channel (RGB) image
                    patch = np.repeat(patch[:, :, None], 3, axis=-1)
                    # 3. Renormalize based on the training set intensities
                    patch = renormalize(
                        patch,
                        max_value=training_max,
                        min_value=training_min)
                    # 4. Crop the center
                    patch = crop_center(patch, config.model_image_size[:2])
                    # 5. Clip to [0, 1] just in case
                    patch[patch > 1.] = 1.
                    patch[patch < 0.] = 0.
                    # 6. Add to list
                    image_stack += [patch[None, :, :, :]]
                    output_files += ['f_%s' % channel]
            else:
                # 1. Load image patch
                patch = produce_patch(
                    f,
                    config.channel,
                    config.panel,
                    divide_panel=config.divide_panel,
                    max_value=config.max_gedi,
                    min_value=config.min_gedi).astype(np.float32)
                # 2. Repeat to 3 channel (RGB) image
                patch = np.repeat(patch[:, :, None], 3, axis=-1)
                # 3. Renormalize based on the training set intensities
                patch = renormalize(
                    patch,
                    max_value=training_max,
                    min_value=training_min)
                # 4. Crop the center
                patch = crop_center(patch, config.model_image_size[:2])
                # 5. Clip to [0, 1] just in case
                patch[patch > 1.] = 1.
                patch[patch < 0.] = 0.
                # 6. Add to list
                image_stack += [patch[None, :, :, :]]
                output_files = np.copy(next_image_batch)
        # Add dimensions and concatenate
        start += config.validation_batch
        yield np.concatenate(
            image_stack, axis=0), label_stack, output_files
Exemplo n.º 4
0
def test_vgg16(validation_data,
               model_dir,
               label_file,
               selected_ckpts=-1,
               force=False):
    config = GEDIconfig()

    # Load metas
    meta_data = np.load(os.path.join(tf_dir, 'val_maximum_value.npz'))
    max_value = np.max(meta_data['max_array']).astype(np.float32)

    # Find model checkpoints
    ckpts, ckpt_names = find_ckpts(config, model_dir)
    # ds_dt_stamp = re.split('/', ckpts[0])[-2]
    out_dir = os.path.join(config.results, 'gfp_2017_02_19_17_41_19' + '/')
    try:
        config = np.load(os.path.join(out_dir, 'meta_info.npy')).item()
        # Make sure this is always at 1
        config.validation_batch = 64
        print '-' * 60
        print 'Loading config meta data for:%s' % out_dir
        print '-' * 60
    except:
        print '-' * 60
        print 'Using config from gedi_config.py for model:%s' % out_dir
        print '-' * 60

    sorted_index = np.argsort(np.asarray([int(x) for x in ckpt_names]))
    ckpts = ckpts[sorted_index]
    ckpt_names = ckpt_names[sorted_index]

    # CSV file
    svm_image_file = os.path.join(out_dir, 'svm_models.npz')
    if os.path.exists(svm_image_file) and force == False:
        svm_image_data = np.load(svm_image_file)
        image_array = svm_image_data['image_array']
        label_vec = svm_image_data['label_vec']
        tr_label_vec = svm_image_data['tr_label_vec']
        np_label_vec = svm_image_data['np_label_vec']
        missing_ims = svm_image_data['missing_ims']
    else:
        labels = pd.read_csv(
            os.path.join(config.processed_image_patch_dir, label_file))
        image_array = []
        label_vec = []
        missing_ims = []
        # Because looking up images form the csv doesn't work, let's find the label for each image
        for _, row in labels.iterrows():
            try:
                im = produce_patch(os.path.join(image_dir, row['lf']),
                                   config.channel,
                                   config.panel,
                                   divide_panel=config.divide_panel,
                                   max_value=None).astype(np.float32)
                im = np.repeat(misc.imresize(im / max_value,
                                             config.model_image_size)[:, :,
                                                                      None],
                               3,
                               axis=-1)
                image_array.append(im)
                label_vec.append(row['Sci_SampleID'])
            except:
                print 'Cannot find %s' % row['lf']
                missing_ims.append(row['lf'])
        np_label_vec = np.asarray(label_vec)
        le = preprocessing.LabelEncoder()
        tr_label_vec = le.fit_transform(np_label_vec)
        np.savez(svm_image_file,
                 image_array=image_array,
                 label_vec=label_vec,
                 tr_label_vec=tr_label_vec,
                 np_label_vec=np_label_vec,
                 missing_ims=missing_ims)

    # Make output directories if they do not exist
    dir_list = [config.results, out_dir]
    [make_dir(d) for d in dir_list]

    # Make placeholder
    val_images = tf.placeholder(tf.float32,
                                shape=[None] + config.model_image_size)

    # Prepare model on GPU
    with tf.device('/gpu:0'):
        with tf.variable_scope('cnn'):
            vgg = vgg16.Vgg16(vgg16_npy_path=config.vgg16_weight_path,
                              fine_tune_layers=config.fine_tune_layers)
            validation_mode = tf.Variable(False, name='training')
            # No batchnorms durign testing
            vgg.build(val_images,
                      output_shape=config.output_shape,
                      train_mode=validation_mode)

    # Set up saver
    svm_feature_file = os.path.join(out_dir, 'svm_scores.npz')
    if os.path.exists(svm_feature_file) and force == False:
        svm_features = np.load(svm_feature_file)
        dec_scores = svm_features['dec_scores']
        label_vec = svm_features['label_vec']
    else:
        saver = tf.train.Saver(tf.global_variables())
        ckpts = [ckpts[selected_ckpts]]
        image_array = np.asarray(image_array)
        for idx, c in enumerate(ckpts):
            dec_scores = []
            # Initialize the graph
            sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
            sess.run(
                tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer()))

            # Set up exemplar threading
            saver.restore(sess, c)
            num_batches = np.ceil(len(image_array) /
                                  config.validation_batch).astype(int)
            batch_idx = np.arange(num_batches).repeat(config.validation_batch +
                                                      1)[:len(image_array)]
            for bi in np.unique(batch_idx):
                # move this above to image processing
                batch_images = image_array[batch_idx == bi] / 255.
                start_time = time.time()
                sc = sess.run(vgg.fc7, feed_dict={val_images: batch_images})
                dec_scores.append(sc)
                print 'Batch %d took %.1f seconds' % (bi,
                                                      time.time() - start_time)

    # Save everything
    np.savez(svm_feature_file, dec_scores=dec_scores, label_vec=label_vec)

    # Build SVM
    dec_scores = np.concatenate(dec_scores[:], axis=0)
    model_array, score_array, combo_array, masked_label_array = [], [], [], []
    for combo in tqdm(itertools.combinations(np.unique(np_label_vec), 2),
                      total=len(np.unique(np_label_vec))):
        combo_array.append(combo)
        mask = np.logical_or(np_label_vec == combo[0],
                             np_label_vec == combo[1])
        masked_labels = np_label_vec[mask]
        masked_scores = dec_scores[mask, :]
        clf = SGDClassifier(loss='hinge')
        scores = cross_val_score(clf, masked_scores, masked_labels, cv=5)
        model_array.append(clf)
        score_array.append(scores)
        masked_label_array.append(masked_labels)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    import ipdb
    ipdb.set_trace()
    # Save everything
    np.savez(os.path.join(out_dir, 'svm_models'),
             combo_array=combo_array,
             model_array=model_array,
             score_array=score_array,
             masked_label_array=masked_label_array)
Exemplo n.º 5
0
def extract_to_tf_records(
        files,
        label_list,
        ratio_list,
        output_pointer,
        config,
        k):
    print('Building %s: %s' % (k, config.tfrecord_dir))
    max_array = np.zeros(len(files))
    min_array = np.zeros(len(files))
    nan_images = np.zeros(len(files))
    with tf.python_io.TFRecordWriter(output_pointer) as tfrecord_writer:
        for idx, (f, l) in tqdm(
            enumerate(
                zip(files, label_list)), total=len(files)):
            r = get_image_ratio(
                f,
                ratio_list,
                timepoints=config.channel,
                id_column=config.id_column,
                regex_match=config.ratio_regex)
            if isinstance(config.channel, list):
                image = []
                for c in config.channel:
                    image += [produce_patch(
                        f,
                        c,
                        config.panel,
                        divide_panel=config.divide_panel,
                        max_value=config.max_gedi,
                        min_value=config.min_gedi,
                        matching=config.matching).astype(
                            np.float32)[None, :, :]]
                image = np.concatenate(image)
                l = (r > config.ratio_cutoff).astype(int)
            else:
                image = produce_patch(
                    f,
                    config.channel,
                    config.panel,
                    divide_panel=config.divide_panel,
                    max_value=config.max_gedi,
                    min_value=config.min_gedi,
                    matching=config.matching).astype(np.float32)
            if np.isnan(image).sum() != 0:
                nan_images[idx] = 1
            if config.include_GEDI_in_tfrecords == False:
                gedi_image = None
            else:
                if config.include_GEDI_in_tfrecords > 0:
                    gedi_image = produce_patch(
                        f,
                        config.channel,
                        2,
                        divide_panel=config.divide_panel,
                        max_value=config.max_gedi,
                        min_value=config.min_gedi).astype(np.float32)
                else:
                    gedi_image = [produce_patch(
                        f,
                        config.channel + idx,
                        2,
                        divide_panel=config.divide_panel,
                        max_value=config.max_gedi,
                        min_value=config.min_gedi).astype(
                            np.float32) for idx in range(
                            config.include_GEDI_in_tfrecords)]
            if config.extra_image:
                extra_image = produce_patch(
                    f,
                    config.channel + 1,  # Hardcoded for now.
                    config.panel,
                    divide_panel=config.divide_panel,
                    max_value=config.max_gedi,
                    min_value=config.min_gedi).astype(np.float32)
            else:
                extra_image = None
            max_array[idx] = np.max(image)
            # construct the Example proto boject
            feature_dict = features_to_dict(
                label=l,
                image=image,
                filename=f,
                ratio=r,
                gedi_image=gedi_image,
                extra_image=extra_image)
            example = tf.train.Example(
                # Example contains a Features proto object
                features=tf.train.Features(
                    # Features has a map of string to Feature proto objects
                    feature=feature_dict
                )
            )
            # use the proto object to serialize the example to a string
            serialized = example.SerializeToString()
            # write the serialized object to disk
            tfrecord_writer.write(serialized)

    # Calculate ratio of +:-
    lab_counts = np.asarray(
        [np.sum(label_list == 0), np.sum(label_list == 1)]).astype(float)
    print('label list preprocessing_tfrecords', label_list)
    if ratio_list is not None:
        ratio = lab_counts / np.asarray((len(label_list))).astype(float)
        print('Data ratio is %s' % ratio)
    else:
        ratio = None
    np.savez(
        os.path.join(
            config.tfrecord_dir, k + '_' + config.max_file),
        max_array=max_array,
        min_array=min_array,
        ratio=ratio,
        filenames=files)
    return max_array, min_array
Exemplo n.º 6
0
def extract_to_tf_records(files, label_list, ratio_list, output_pointer,
                          config, k):
    print('Building %s: %s' % (k, config.tfrecord_dir))
    max_array = []
    min_array = []
    count = 0
    with tf.python_io.TFRecordWriter(output_pointer) as tfrecord_writer:
        for idx, (f, l) in tqdm(enumerate(zip(files, label_list)),
                                total=len(files)):
            if isinstance(config.channel, list):
                image = []
                for c in config.channel:
                    image += [
                        produce_patch(f,
                                      c,
                                      config.panel,
                                      divide_panel=config.divide_panel,
                                      max_value=config.max_gedi,
                                      min_value=config.min_gedi,
                                      matching=config.matching).astype(
                                          np.float32)[None, :, :]
                    ]
                image = np.concatenate(image)
            else:
                image = produce_patch(f,
                                      config.channel,
                                      config.panel,
                                      divide_panel=config.divide_panel,
                                      max_value=config.max_gedi,
                                      min_value=config.min_gedi,
                                      matching=config.matching).astype(
                                          np.float32)

            if image.shape[-1] == config.gedi_image_size[1] * 3:

                # Extract timepoint information
                # 0 = dataset
                # 1 = First panel timepoint
                # 2 = Well
                # 3 = Neuron number
                # 4 = Second panel timepoint
                # 5 - 17 = Third panel information
                split_tokens = f.split(os.path.sep)[-1].split('_')

                # Create two tfrecord entries - p1vp2 and p1vp3
                p1 = np.expand_dims(image[:, :config.gedi_image_size[1]],
                                    axis=-1)
                f1 = ''.join(i for i in split_tokens[1] if i.isdigit())
                f1 = float(f1)
                p2 = np.expand_dims(
                    image[:,
                          config.gedi_image_size[1]:config.gedi_image_size[1] *
                          2],
                    axis=-1)
                f2 = ''.join(i for i in split_tokens[4] if i.isdigit())
                f2 = float(f2)
                p3 = np.expand_dims(image[:, config.gedi_image_size[1] *
                                          2:config.gedi_image_size[1] * 3],
                                    axis=-1)
                f1vf2d = f2 - f1
                f1vf3d = -1.

                # Create images
                p1vp3 = np.concatenate([p1, p3], axis=-1)
                p1vp2 = np.concatenate([p1, p2], axis=-1)

                # SAME
                max_array += [np.max(p1), np.max(p2)]
                # construct the Example proto boject
                feature_dict = features_to_dict(
                    label=1,  # Same cells
                    image=p1vp2,
                    filename=f,
                    ratio=f1vf2d)
                example = tf.train.Example(
                    # Example contains a Features proto object
                    features=tf.train.Features(
                        # Features has a map of string to Feature proto objects
                        feature=feature_dict))
                count += 1
                # use the proto object to serialize the example to a string
                serialized = example.SerializeToString()
                # write the serialized object to disk
                tfrecord_writer.write(serialized)

                # DIFFERENT
                max_array += [np.max(p1), np.max(p3)]
                # construct the Example proto boject
                feature_dict = features_to_dict(
                    label=0,  # Different cells
                    image=p1vp3,
                    filename=f,
                    ratio=f1vf3d)
                example = tf.train.Example(
                    # Example contains a Features proto object
                    features=tf.train.Features(
                        # Features has a map of string to Feature proto objects
                        feature=feature_dict))
                count += 1
                # use the proto object to serialize the example to a string
                serialized = example.SerializeToString()
                # write the serialized object to disk
                tfrecord_writer.write(serialized)
            else:
                print('Skipped image %s' % f)
    # Calculate ratio of +:-
    ratio = [.5, .5]
    print('Data ratio is %s' % ratio)
    np.savez(os.path.join(config.tfrecord_dir, k + '_' + config.max_file),
             max_array=max_array,
             min_array=min_array,
             ratio=ratio,
             filenames=files)
    return max_array, min_array