def get_threat_heatmaps(mode): if not os.path.exists('done'): names, labels, x = get_aps_data_hdf5(mode) f = h5py.File('data.hdf5', 'w') th = f.create_dataset('th', x.shape + (3, )) with read_input_dir('hand_labeling/threat_segmentation/base'): for i, (name, label, data) in tqdm.tqdm(enumerate(zip(names, labels, x)), total=len(x)): files = glob.glob(name + '*') assert files, 'missing hand segmentation for %s' % name image = imageio.imread(files[0]) masks = [ _get_mask(image, SEGMENTATION_COLORS[ci]) for ci in range(3) ] with read_input_dir( 'hand_labeling/threat_segmentation/revision_v0'): for revision in glob.glob(name + '*'): rlabel = int(revision.split('_')[1].split('.')[0]) rci = [i + 1 for i in range(17) if label[i]].index(rlabel) rimage = imageio.imread(revision) masks[rci] = _get_mask(rimage, SEGMENTATION_COLORS[0]) th[i] = np.stack(masks, axis=-1) open('done', 'w').close() else: f = h5py.File('data.hdf5', 'r') th = f['th'] return th
def get_a3d_projection_data(mode, percentile): if not os.path.exists('done'): angles, width, height = 16, 512, 660 tf.reset_default_graph() data_in = tf.placeholder(tf.float32, [width // 2, width // 2, height // 2]) angle = tf.placeholder(tf.float32, []) with tf.device('/cpu:0'): image = tf.contrib.image.rotate(data_in, -2 * math.pi * angle / angles) max_proj = tf.reduce_max(image, axis=1) mean_proj, var_proj = tf.nn.moments(image, axes=[1]) std_proj = tf.sqrt(var_proj) surf = image > tf.contrib.distributions.percentile( image, percentile, axis=1, keep_dims=True) dmap = tf.cast( tf.argmax(tf.cast(surf, tf.int32), axis=1) / width, tf.float32) proj = tf.image.rot90( tf.stack([dmap, max_proj, mean_proj, std_proj], axis=-1)) gen = get_data(mode, 'a3d') f = h5py.File('data.hdf5', 'w') dset = f.create_dataset('dset', (len(gen), angles, height // 2, width // 2, 5)) names, labels, dset_in = get_aps_data_hdf5(mode) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i, (_, _, data) in enumerate(tqdm.tqdm(get_data(mode, 'a3d'))): data = (data[::2, ::2, ::2] + data[::2, ::2, 1::2] + data[::2, 1::2, ::2] + data[::2, 1::2, 1::2] + data[1::2, ::2, ::2] + data[1::2, ::2, 1::2] + data[1::2, 1::2, ::2] + data[1::2, 1::2, 1::2]) / 8 for j in tqdm.trange(angles): dset[i, j, ..., :-1] = sess.run(proj, feed_dict={ data_in: data, angle: j }) dset[i, j, ..., -1] = (dset_in[i, ::2, ::2, j] + dset_in[i, ::2, 1::2, j] + dset_in[i, 1::2, ::2, j] + dset_in[i, 1::2, 1::2, j]) f.close() with open('pkl', 'wb') as f: pickle.dump((names, labels), f) open('done', 'w').close() with open('pkl', 'rb') as f: names, labels = pickle.load(f) f = h5py.File('data.hdf5', 'r') dset = f['dset'] return names, labels, dset
def get_passenger_groups(mode): assert not mode.startswith('test') clusters = get_passenger_clusters() names, _, _ = get_aps_data_hdf5(mode) group = [None] * len(names) for i in range(len(group)): for j, cluster in enumerate(clusters): if names[i] in cluster: group[i] = j return group
def train_clustering_model(mode, duration): tf.reset_default_graph() dmat_in = tf.placeholder(tf.float32, [None, None, 27]) labels_in = tf.placeholder(tf.float32, [None, None]) dmat = tf.reshape(dmat_in, [-1, 27]) mean, var = tf.nn.moments(dmat, [0, 1]) dmat = (dmat - mean) / tf.sqrt(var) labels = tf.reshape(labels_in, [-1]) logits = tf.squeeze(tf.layers.dense(dmat, 1)) logprob = -tf.nn.softplus(-logits) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train_step = optimizer.minimize(loss) saver = tf.train.Saver() model_path = os.getcwd() + '/model.ckpt' def predict(x): with tf.Session() as sess: saver.restore(sess, model_path) return sess.run(logprob, feed_dict={dmat_in: x}) if os.path.exists('done'): return predict dmat_train = get_distance_matrix(mode) clusters = get_passenger_clusters() names, _, _ = get_aps_data_hdf5(mode) name_idx = {x: i for i, x in enumerate(names)} labels_train = np.zeros(dmat_train.shape[:2]) for cluster in clusters: for name1 in cluster: for name2 in cluster: i1, i2 = name_idx[name1], name_idx[name2] labels_train[i1, i2] = 1 def train_model(sess, duration): t0 = time.time() while time.time() - t0 < duration * 3600: sess.run(train_step, feed_dict={ dmat_in: dmat_train, labels_in: labels_train }) saver.save(sess, model_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_model(sess, duration) open('done', 'w').close() return predict
def naive_cluster_passengers(mode, n_clusters): names, _, x = get_aps_data_hdf5(mode) images = x[:, ::8, ::8, 0].reshape((len(x), -1)) reduced_data = sklearn.decomposition.PCA(n_components=128).fit_transform(images) kmeans = sklearn.cluster.KMeans(n_clusters).fit(reduced_data) clusters = kmeans.predict(reduced_data) for i in range(n_clusters): os.mkdir(str(i)) for name, cluster, data in tqdm.tqdm(zip(names, clusters, x), total=len(x)): imageio.imsave('%s/%s.png' % (cluster, name), data[..., 0]/data[..., 0].max())
def get_distance_matrix(mode): if not os.path.exists('done'): batch_size = 32 tf.reset_default_graph() x1_in = tf.placeholder(tf.float32, [None, 660, 512, 16]) x2_in = tf.placeholder(tf.float32, [None, 660, 512, 16]) dist_mats = [] for feat in range(3): res = 512 if feat == 0: x1, x2 = x1_in, x2_in elif feat == 1: x1, x2 = x1_in[:, :330, :, :], x2_in[:, :330, :, :] else: x1, x2 = x1_in[:, :, 128:384, :], x2_in[:, :, 128:384, :] x1 = tf.image.resize_images(x1, [res, res]) x2 = tf.image.resize_images(x2, [res, res]) for _ in range(9): n = 16 * res**2 x1_v = tf.reshape(x1, [-1, n]) x2_v = tf.transpose(tf.reshape(x2, [-1, n])) dots = tf.matmul(x1_v, x2_v) diff = tf.reduce_sum(tf.square(x1_v), axis=1, keep_dims=True) - 2*dots + \ tf.reduce_sum(tf.square(x2_v), axis=0, keep_dims=True) dist = tf.sqrt(tf.maximum(diff/n, 0)) dist_mats.append(dist) res //= 2 x1 = tf.image.resize_images(x1, [res, res]) x2 = tf.image.resize_images(x2, [res, res]) dist_mat = tf.stack(dist_mats, axis=-1) _, _, dset = get_aps_data_hdf5(mode) dmat = np.zeros((len(dset), len(dset), 27)) with tf.Session() as sess: for i in tqdm.trange(0, len(dset), batch_size): for j in tqdm.trange(0, len(dset), batch_size): mat = sess.run(dist_mat, feed_dict={ x1_in: dset[i:i+batch_size], x2_in: dset[j:j+batch_size] }) dmat[i:i+batch_size, j:j+batch_size, :] = mat np.save('dmat.npy', dmat) open('done', 'w').close() else: dmat = np.load('dmat.npy') return dmat
def get_data_and_threat_heatmaps(mode): names, labels, x = get_aps_data_hdf5(mode) if not os.path.exists('done'): th = get_threat_heatmaps(mode) f = h5py.File('data.hdf5', 'w') dset = f.create_dataset('dset', x.shape + (4, )) for i, (data, hmap) in tqdm.tqdm(enumerate(zip(x, th)), total=len(x)): dset[i] = np.concatenate([data[..., np.newaxis], hmap], axis=-1) open('done', 'w').close() else: f = h5py.File('data.hdf5', 'r') dset = f['dset'] return names, labels, dset
def plot_nearest_neighbors(mode, max_near): perm = passenger_clustering.get_nearest_neighbors(mode) group = passenger_clustering.get_passenger_groups(mode) names, _, dset = get_aps_data_hdf5(mode) for i, name in enumerate(names): n_wrong = sum(group[perm[i][j]] != group[i] for j in range(max_near)) images = [] for j in range(max_near): images.append(dset[perm[i][j], ::4, ::4, 0]) rows = [np.concatenate(images[i:i+4], axis=1) for i in range(0, max_near, 4)] image = np.concatenate(rows, axis=0) imageio.imsave('%s_%s.png' % (n_wrong, name), image / image.max())
def write_aps_hand_labeling_revision_v0(mode): names, _, x = get_aps_data_hdf5(mode) todo = {} with read_input_dir('hand_labeling/threat_segmentation'): with open('revision_v0.txt', 'r') as f: for line in f: name, labels = line[:5], line[6:] assert name not in todo, "duplicate revision names" todo[name] = [int(x) for x in labels.split(', ')] for name, data in tqdm.tqdm(zip(names, x), total=len(x)): for label in todo.get(name[:5], []): images = np.concatenate(np.rollaxis(data, 2), axis=1) / data.max() filename = '%s_%s' % (name, label) skimage.io.imsave('%s.png' % filename, np.repeat(images[..., np.newaxis], 3, axis=-1))
def plot_image_registration_samples(mode, n_samples): names, _, dset = get_aps_data_hdf5(mode) group = passenger_clustering.get_passenger_groups(mode) for spacing in tqdm.tqdm([8, 16, 32, 64]): for num_res in tqdm.tqdm([2, 3, 4]): for num_iter in tqdm.tqdm([8, 16, 32, 64, 128]): np.random.seed(0) im1, im2 = [], [] for i in range(n_samples): while True: i1, i2, angle = np.random.randint(len(dset)), np.random.randint(len(dset)), \ np.random.randint(16) if group[i1] == group[i2]: break d1, d2 = dset[i1, ..., angle], dset[i2, ..., angle] d1 /= d1.max() d2 /= d2.max() im1.append(d1) im2.append(d2) params = common.pyelastix.get_default_params() params.FinalGridSpacingInPhysicalUnits = spacing params.NumberOfResolutions = num_res params.MaximumNumberOfIterations = num_iter reg = passenger_clustering.register_images(im1, im2, params) for i, (d1, d2, im) in enumerate(zip(im1, im2, reg)): im /= im.max() image = np.concatenate([ np.concatenate([d1, d2], axis=1), np.concatenate([im, np.zeros(d1.shape)], axis=1) ], axis=0) image = np.repeat(image[..., np.newaxis], 3, axis=-1) image[660:, 512:, 0] = d2 image[660:, 512:, 1] = im path = '%s/%s/%s' % (spacing, num_res, num_iter) if not os.path.exists(path): os.makedirs(path) imageio.imsave('%s/%s.png' % (path, i), image)
def write_passenger_id_images(mode): names, _, x = get_aps_data_hdf5(mode) for name, data in tqdm.tqdm(zip(names, x), total=len(x)): imageio.imsave('%s.png' % name, data[..., 0] / np.max(data[..., 0]))
def write_aps_hand_labeling_images(mode): names, labels, x = get_aps_data_hdf5(mode) for name, label, data in tqdm.tqdm(zip(names, labels, x), total=len(x)): images = np.concatenate(np.rollaxis(data, 2), axis=1) / data.max() filename = '_'.join([name] + [str(i+1) for i in range(17) if label and label[i]]) skimage.io.imsave('%s.png' % filename, np.repeat(images[..., np.newaxis], 3, axis=-1))