def image_batcher(start, num_batches, images, config, training_max, training_min): for b in range(num_batches): next_image_batch = images[start:start + config.validation_batch] image_stack = [] for f in next_image_batch: # 1. Load image patch patch = produce_patch(f, config.channel, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi).astype(np.float32) # 2. Repeat to 3 channel (RGB) image patch = np.repeat(patch[:, :, None], 3, axis=-1) # 3. Renormalize based on the training set intensities patch = renormalize(patch, max_value=training_max, min_value=training_min) # 4. Crop the center patch = crop_center(patch, config.model_image_size[:2]) # 5. Clip to [0, 1] just in case patch[patch > 1.] = 1. patch[patch < 0.] = 0. # 6. Add to list image_stack += [patch[None, :, :, :]] # Add dimensions and concatenate start += config.validation_batch print(type(next_image_batch)) yield np.concatenate(image_stack, axis=0), next_image_batch
def extract_to_tf_records( files, label_list, ratio_list, output_pointer, config, k, rescale=True): """Extract images as TF record files.""" print('Building %s: %s' % (k, config.tfrecord_dir)) max_array = [] min_array = [] nan_images = [] count = 0 with tf.python_io.TFRecordWriter(output_pointer) as tfrecord_writer: for idx, (f, l) in tqdm( enumerate( zip(files, label_list)), total=len(files)): r = get_image_ratio( f, ratio_list, timepoints=config.channel, id_column=config.id_column, regex_match=config.ratio_regex) num_timepoints = derive_timepoints(f) images = produce_patch( f, config.channel, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi, return_raw=True).astype(np.float32) # Produce {t, t+1} image set for every pair in images for t in range(1, num_timepoints): it_im = images[t - 1] next_im = images[t] if rescale: it_im = rescale_patch( it_im, min_value=it_im.max(), max_value=it_im.min()) next_im = rescale_patch( next_im, min_value=next_im.max(), max_value=next_im.min()) max_array += [np.max(it_im)] min_array += [np.min(it_im)] if np.isnan(it_im).sum(): nan_images += [1] # construct the Example proto boject feature_dict = features_to_dict( label=l, image=it_im, filename=f, ratio=r, gedi_image=None, # Not implemented extra_image=next_im) count += 1 example = tf.train.Example( # Example contains a Features proto object features=tf.train.Features( # Features has a map of string to Feature proto objects feature=feature_dict ) ) # use the proto object to serialize the example to a string serialized = example.SerializeToString() # write the serialized object to disk tfrecord_writer.write(serialized) # Calculate ratio of +:- lab_counts = np.asarray( [np.sum(label_list == 0), np.sum(label_list == 1)]).astype(float) ratio = lab_counts / np.asarray((len(label_list))).astype(float) print('Data ratio is %s' % ratio) np.savez( os.path.join( config.tfrecord_dir, k + '_' + config.max_file), max_array=max_array, min_array=min_array, ratio=ratio, filenames=files, nan_images=nan_images) return max_array, min_array
def image_batcher( start, num_batches, images, labels, config, training_max, training_min, num_channels=3, per_timepoint=False): """Placeholder image/label batch loader.""" for b in range(num_batches): next_image_batch = images[start:start + config.validation_batch] image_stack, output_files = [], [] label_stack = labels[start:start + config.validation_batch] for f in next_image_batch: if per_timepoint: for channel in range(num_channels): # 1. Load image patch patch = produce_patch( f, channel, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi).astype(np.float32) # 2. Repeat to 3 channel (RGB) image patch = np.repeat(patch[:, :, None], 3, axis=-1) # 3. Renormalize based on the training set intensities patch = renormalize( patch, max_value=training_max, min_value=training_min) # 4. Crop the center patch = crop_center(patch, config.model_image_size[:2]) # 5. Clip to [0, 1] just in case patch[patch > 1.] = 1. patch[patch < 0.] = 0. # 6. Add to list image_stack += [patch[None, :, :, :]] output_files += ['f_%s' % channel] else: # 1. Load image patch patch = produce_patch( f, config.channel, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi).astype(np.float32) # 2. Repeat to 3 channel (RGB) image patch = np.repeat(patch[:, :, None], 3, axis=-1) # 3. Renormalize based on the training set intensities patch = renormalize( patch, max_value=training_max, min_value=training_min) # 4. Crop the center patch = crop_center(patch, config.model_image_size[:2]) # 5. Clip to [0, 1] just in case patch[patch > 1.] = 1. patch[patch < 0.] = 0. # 6. Add to list image_stack += [patch[None, :, :, :]] output_files = np.copy(next_image_batch) # Add dimensions and concatenate start += config.validation_batch yield np.concatenate( image_stack, axis=0), label_stack, output_files
def test_vgg16(validation_data, model_dir, label_file, selected_ckpts=-1, force=False): config = GEDIconfig() # Load metas meta_data = np.load(os.path.join(tf_dir, 'val_maximum_value.npz')) max_value = np.max(meta_data['max_array']).astype(np.float32) # Find model checkpoints ckpts, ckpt_names = find_ckpts(config, model_dir) # ds_dt_stamp = re.split('/', ckpts[0])[-2] out_dir = os.path.join(config.results, 'gfp_2017_02_19_17_41_19' + '/') try: config = np.load(os.path.join(out_dir, 'meta_info.npy')).item() # Make sure this is always at 1 config.validation_batch = 64 print '-' * 60 print 'Loading config meta data for:%s' % out_dir print '-' * 60 except: print '-' * 60 print 'Using config from gedi_config.py for model:%s' % out_dir print '-' * 60 sorted_index = np.argsort(np.asarray([int(x) for x in ckpt_names])) ckpts = ckpts[sorted_index] ckpt_names = ckpt_names[sorted_index] # CSV file svm_image_file = os.path.join(out_dir, 'svm_models.npz') if os.path.exists(svm_image_file) and force == False: svm_image_data = np.load(svm_image_file) image_array = svm_image_data['image_array'] label_vec = svm_image_data['label_vec'] tr_label_vec = svm_image_data['tr_label_vec'] np_label_vec = svm_image_data['np_label_vec'] missing_ims = svm_image_data['missing_ims'] else: labels = pd.read_csv( os.path.join(config.processed_image_patch_dir, label_file)) image_array = [] label_vec = [] missing_ims = [] # Because looking up images form the csv doesn't work, let's find the label for each image for _, row in labels.iterrows(): try: im = produce_patch(os.path.join(image_dir, row['lf']), config.channel, config.panel, divide_panel=config.divide_panel, max_value=None).astype(np.float32) im = np.repeat(misc.imresize(im / max_value, config.model_image_size)[:, :, None], 3, axis=-1) image_array.append(im) label_vec.append(row['Sci_SampleID']) except: print 'Cannot find %s' % row['lf'] missing_ims.append(row['lf']) np_label_vec = np.asarray(label_vec) le = preprocessing.LabelEncoder() tr_label_vec = le.fit_transform(np_label_vec) np.savez(svm_image_file, image_array=image_array, label_vec=label_vec, tr_label_vec=tr_label_vec, np_label_vec=np_label_vec, missing_ims=missing_ims) # Make output directories if they do not exist dir_list = [config.results, out_dir] [make_dir(d) for d in dir_list] # Make placeholder val_images = tf.placeholder(tf.float32, shape=[None] + config.model_image_size) # Prepare model on GPU with tf.device('/gpu:0'): with tf.variable_scope('cnn'): vgg = vgg16.Vgg16(vgg16_npy_path=config.vgg16_weight_path, fine_tune_layers=config.fine_tune_layers) validation_mode = tf.Variable(False, name='training') # No batchnorms durign testing vgg.build(val_images, output_shape=config.output_shape, train_mode=validation_mode) # Set up saver svm_feature_file = os.path.join(out_dir, 'svm_scores.npz') if os.path.exists(svm_feature_file) and force == False: svm_features = np.load(svm_feature_file) dec_scores = svm_features['dec_scores'] label_vec = svm_features['label_vec'] else: saver = tf.train.Saver(tf.global_variables()) ckpts = [ckpts[selected_ckpts]] image_array = np.asarray(image_array) for idx, c in enumerate(ckpts): dec_scores = [] # Initialize the graph sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) # Set up exemplar threading saver.restore(sess, c) num_batches = np.ceil(len(image_array) / config.validation_batch).astype(int) batch_idx = np.arange(num_batches).repeat(config.validation_batch + 1)[:len(image_array)] for bi in np.unique(batch_idx): # move this above to image processing batch_images = image_array[batch_idx == bi] / 255. start_time = time.time() sc = sess.run(vgg.fc7, feed_dict={val_images: batch_images}) dec_scores.append(sc) print 'Batch %d took %.1f seconds' % (bi, time.time() - start_time) # Save everything np.savez(svm_feature_file, dec_scores=dec_scores, label_vec=label_vec) # Build SVM dec_scores = np.concatenate(dec_scores[:], axis=0) model_array, score_array, combo_array, masked_label_array = [], [], [], [] for combo in tqdm(itertools.combinations(np.unique(np_label_vec), 2), total=len(np.unique(np_label_vec))): combo_array.append(combo) mask = np.logical_or(np_label_vec == combo[0], np_label_vec == combo[1]) masked_labels = np_label_vec[mask] masked_scores = dec_scores[mask, :] clf = SGDClassifier(loss='hinge') scores = cross_val_score(clf, masked_scores, masked_labels, cv=5) model_array.append(clf) score_array.append(scores) masked_label_array.append(masked_labels) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) import ipdb ipdb.set_trace() # Save everything np.savez(os.path.join(out_dir, 'svm_models'), combo_array=combo_array, model_array=model_array, score_array=score_array, masked_label_array=masked_label_array)
def extract_to_tf_records( files, label_list, ratio_list, output_pointer, config, k): print('Building %s: %s' % (k, config.tfrecord_dir)) max_array = np.zeros(len(files)) min_array = np.zeros(len(files)) nan_images = np.zeros(len(files)) with tf.python_io.TFRecordWriter(output_pointer) as tfrecord_writer: for idx, (f, l) in tqdm( enumerate( zip(files, label_list)), total=len(files)): r = get_image_ratio( f, ratio_list, timepoints=config.channel, id_column=config.id_column, regex_match=config.ratio_regex) if isinstance(config.channel, list): image = [] for c in config.channel: image += [produce_patch( f, c, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi, matching=config.matching).astype( np.float32)[None, :, :]] image = np.concatenate(image) l = (r > config.ratio_cutoff).astype(int) else: image = produce_patch( f, config.channel, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi, matching=config.matching).astype(np.float32) if np.isnan(image).sum() != 0: nan_images[idx] = 1 if config.include_GEDI_in_tfrecords == False: gedi_image = None else: if config.include_GEDI_in_tfrecords > 0: gedi_image = produce_patch( f, config.channel, 2, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi).astype(np.float32) else: gedi_image = [produce_patch( f, config.channel + idx, 2, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi).astype( np.float32) for idx in range( config.include_GEDI_in_tfrecords)] if config.extra_image: extra_image = produce_patch( f, config.channel + 1, # Hardcoded for now. config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi).astype(np.float32) else: extra_image = None max_array[idx] = np.max(image) # construct the Example proto boject feature_dict = features_to_dict( label=l, image=image, filename=f, ratio=r, gedi_image=gedi_image, extra_image=extra_image) example = tf.train.Example( # Example contains a Features proto object features=tf.train.Features( # Features has a map of string to Feature proto objects feature=feature_dict ) ) # use the proto object to serialize the example to a string serialized = example.SerializeToString() # write the serialized object to disk tfrecord_writer.write(serialized) # Calculate ratio of +:- lab_counts = np.asarray( [np.sum(label_list == 0), np.sum(label_list == 1)]).astype(float) print('label list preprocessing_tfrecords', label_list) if ratio_list is not None: ratio = lab_counts / np.asarray((len(label_list))).astype(float) print('Data ratio is %s' % ratio) else: ratio = None np.savez( os.path.join( config.tfrecord_dir, k + '_' + config.max_file), max_array=max_array, min_array=min_array, ratio=ratio, filenames=files) return max_array, min_array
def extract_to_tf_records(files, label_list, ratio_list, output_pointer, config, k): print('Building %s: %s' % (k, config.tfrecord_dir)) max_array = [] min_array = [] count = 0 with tf.python_io.TFRecordWriter(output_pointer) as tfrecord_writer: for idx, (f, l) in tqdm(enumerate(zip(files, label_list)), total=len(files)): if isinstance(config.channel, list): image = [] for c in config.channel: image += [ produce_patch(f, c, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi, matching=config.matching).astype( np.float32)[None, :, :] ] image = np.concatenate(image) else: image = produce_patch(f, config.channel, config.panel, divide_panel=config.divide_panel, max_value=config.max_gedi, min_value=config.min_gedi, matching=config.matching).astype( np.float32) if image.shape[-1] == config.gedi_image_size[1] * 3: # Extract timepoint information # 0 = dataset # 1 = First panel timepoint # 2 = Well # 3 = Neuron number # 4 = Second panel timepoint # 5 - 17 = Third panel information split_tokens = f.split(os.path.sep)[-1].split('_') # Create two tfrecord entries - p1vp2 and p1vp3 p1 = np.expand_dims(image[:, :config.gedi_image_size[1]], axis=-1) f1 = ''.join(i for i in split_tokens[1] if i.isdigit()) f1 = float(f1) p2 = np.expand_dims( image[:, config.gedi_image_size[1]:config.gedi_image_size[1] * 2], axis=-1) f2 = ''.join(i for i in split_tokens[4] if i.isdigit()) f2 = float(f2) p3 = np.expand_dims(image[:, config.gedi_image_size[1] * 2:config.gedi_image_size[1] * 3], axis=-1) f1vf2d = f2 - f1 f1vf3d = -1. # Create images p1vp3 = np.concatenate([p1, p3], axis=-1) p1vp2 = np.concatenate([p1, p2], axis=-1) # SAME max_array += [np.max(p1), np.max(p2)] # construct the Example proto boject feature_dict = features_to_dict( label=1, # Same cells image=p1vp2, filename=f, ratio=f1vf2d) example = tf.train.Example( # Example contains a Features proto object features=tf.train.Features( # Features has a map of string to Feature proto objects feature=feature_dict)) count += 1 # use the proto object to serialize the example to a string serialized = example.SerializeToString() # write the serialized object to disk tfrecord_writer.write(serialized) # DIFFERENT max_array += [np.max(p1), np.max(p3)] # construct the Example proto boject feature_dict = features_to_dict( label=0, # Different cells image=p1vp3, filename=f, ratio=f1vf3d) example = tf.train.Example( # Example contains a Features proto object features=tf.train.Features( # Features has a map of string to Feature proto objects feature=feature_dict)) count += 1 # use the proto object to serialize the example to a string serialized = example.SerializeToString() # write the serialized object to disk tfrecord_writer.write(serialized) else: print('Skipped image %s' % f) # Calculate ratio of +:- ratio = [.5, .5] print('Data ratio is %s' % ratio) np.savez(os.path.join(config.tfrecord_dir, k + '_' + config.max_file), max_array=max_array, min_array=min_array, ratio=ratio, filenames=files) return max_array, min_array