def test_serialise(self): """ Create a network and serialise its biases and weights.""" num_layers = 7 num_cls = 10 im_dim = Shape(3, 512, 512) # Setup default network. Variables are random. net = orpac_net.Orpac(self.sess, im_dim, num_layers, num_cls, None, False) self.sess.run(tf.global_variables_initializer()) # Serialise the network biases and weights. data = net.serialise() assert isinstance(data, dict) assert set(data.keys()) == {'weight', 'bias', 'num-layers'} assert set(data['bias'].keys()) == set(range(net.numLayers())) assert set(data['weight'].keys()) == set(range(net.numLayers())) assert data['num-layers'] == num_layers # Verify the variables. for i in range(net.numLayers()): assert np.array_equal(net.getBias(i), data['bias'][i]) assert np.array_equal(net.getWeight(i), data['weight'][i])
def loadRawData(self, path, ft_dim, num_samples): """Return feature and label vector for data set of choice. Returns: im_dim: Shape Image shape ft_dim: Shape Dimensions of training data. int2name: dict[int:str] A LUT to translate machine labels to human readable strings. For instance {0: 'None', 1: 'Cube 0', 2: 'Cube 1'}. train: N-List[TrainingSample] Training data. """ # Compile a list of JPG images in the source folder. Then verify that # a) each is a valid JPG file and b) all images have the same size. fnames = self.findTrainingFiles(path, num_samples) # Load and verify that the pickled meta data for each JPG file # specifies the same set of class labels. int2name = self.getLabelData(fnames) num_cls = len(int2name) # Compute the height and width that input images must have to be # compatible with the selected output feature size. im_dim = orpac_net.waveletToImageDim(Shape(None, *ft_dim.hw())) # Fill in channel information: Images must always be RGB and the # feature output channels are available via a utility method. im_dim.chan = 3 ft_dim.chan = orpac_net.Orpac.numOutputChannels(num_cls) # Compile all the features that have not been compiled already. self.compileMissingFeatures(fnames, ft_dim) # Load the compiled training data alongside each image. train = self.loadTrainingData(fnames, im_dim, ft_dim, num_cls) return im_dim, ft_dim, int2name, train
def test_weights_and_biases(self): """Create default network and test various accessor methods""" im_dim = Shape(3, 512, 512) num_cls, num_layers = 10, 7 # Create network with random weights. net = orpac_net.Orpac(self.sess, im_dim, num_layers, num_cls, None, False) self.sess.run(tf.global_variables_initializer()) # First layer must be compatible with input. assert net.getBias(0).shape == (64, 1, 1) assert net.getWeight(0).shape == (3, 3, net._xin.shape[1], 64) # The last filter is responsible for creating the various features we # train the network on. Its dimension must be 33x33 to achieve a large # receptive field on the input image. num_ft_chan = net.outputShape().chan net.getBias(num_layers - 1).shape == (num_ft_chan, 1, 1) net.getWeight(num_layers - 1).shape == (33, 33, 64, num_ft_chan) # The output layer must have the correct number of features and # feature map size. This excludes the batch dimension. assert net.output().shape[1:] == net.outputShape().chw()
def test_restore(self): """ Restore a network. This test cannot be combined with `test_serialise` because of TFs idiosyncrasies with (not) sharing Tensor names. Therefore, specify dummy values for three layers, pass them to the Ctor, and verify the values are correct. """ sess = self.sess num_cls, num_layers = 10, 3 im_dim = Shape(3, 512, 512) # Use utility functions to determine the number channels of the network # output layer. Also determine the number of ... num_ft_chan = orpac_net.Orpac.numOutputChannels(num_cls) dim_xin = orpac_net.imageToWaveletDim(im_dim) # Create variables for first, middle and last layer. The first layer # must be adapted to the input, the middle layer is fixed and the last # layer must encode the features (ie BBox, isFg, Class). bw_init = {'bias': {}, 'weight': {}} bw_init['bias'][0] = 0 * np.ones((64, 1, 1), np.float32) bw_init['weight'][0] = 0 * np.ones((3, 3, dim_xin.chan, 64), np.float32) bw_init['bias'][1] = 1 * np.ones((64, 1, 1), np.float32) bw_init['weight'][1] = 1 * np.ones((3, 3, 64, 64), np.float32) bw_init['bias'][2] = 2 * np.ones((num_ft_chan, 1, 1), np.float32) bw_init['weight'][2] = 2 * np.ones((33, 33, 64, num_ft_chan), np.float32) bw_init['num-layers'] = 3 # Create a new network and restore its weights. net = orpac_net.Orpac(sess, im_dim, num_layers, num_cls, bw_init, False) sess.run(tf.global_variables_initializer()) # Ensure the weights are as specified. for i in range(net.numLayers()): assert np.array_equal(net.getBias(i), bw_init['bias'][i]) assert np.array_equal(net.getWeight(i), bw_init['weight'][i])
def setup_class(cls): # Feature dimension will only be 2x2 to simplify testing and debugging. ft_dim = Shape(None, 2, 2) num_cls, num_layers = 10, 7 # Compute the image dimensions required for a 2x2 feature size. im_dim = orpac_net.waveletToImageDim(ft_dim) # Create Tensorflow session and dummy network. The network is such that # the feature size is only 2x2 because this makes testing easier. cls.sess = tf.Session() cls.net = orpac_net.Orpac( cls.sess, im_dim, num_layers, num_cls, None, train=False) assert cls.net.outputShape().hw() == (2, 2) # A dummy feature tensor that we will populate it with our own data to # simulate the network output. To create it we simply "clone" the # genuine network output tensor. cls.y_pred_in = tf.placeholder(tf.float32, cls.net.output().shape) # Setup cost computation. This will create a node for `y_true`. cls.total_cost = orpac_net.createCostNodes(cls.y_pred_in) g = tf.get_default_graph().get_tensor_by_name cls.y_true_in = g('orpac-cost/y_true:0')
def main(): param = parseCmdline() sess = tf.Session() # File names. netstate_path = 'netstate' os.makedirs(netstate_path, exist_ok=True) fnames = { 'meta': os.path.join(netstate_path, 'orpac-meta.pickle'), 'orpac-net': os.path.join(netstate_path, 'orpac-net.pickle'), 'checkpt': os.path.join(netstate_path, 'tf-checkpoint.pickle'), } del netstate_path # Restore the configuration if it exists, otherwise create a new one. print('\n----- Simulation Parameters -----') restore = os.path.exists(fnames['meta']) if restore: meta = pickle.load(open(fnames['meta'], 'rb')) conf, log = meta['conf'], meta['log'] bw_init = pickle.load(open(fnames['orpac-net'], 'rb')) else: log = collections.defaultdict(list) conf = config.NetConf( seed=0, epoch=0, num_layers=7, path=os.path.join('data', '3dflight'), ft_dim=Shape(None, 64, 64), num_samples=None ) bw_init = None print(f'Restored from <{None}>') print('\n', conf) # Load the BBox training data. print('\n----- Data Set -----') ds = data_loader.ORPAC(conf.path, conf.ft_dim, conf.num_samples, conf.seed) ds.printSummary() int2name = ds.int2name() num_classes = len(int2name) im_dim = ds.imageShape() # Input/output/parameter tensors for network. print('\n----- Network Setup -----') # Create input tensor and trainable ORPAC net. net = orpac_net.Orpac(sess, im_dim, conf.num_layers, num_classes, bw_init, True) # Select cost function and optimiser, then initialise the TF graph. sess.run(tf.global_variables_initializer()) # Ensure the network output shape matches the training output. assert net.outputShape() == ds.featureShape() print('Output feature map size: ', net.outputShape()) # Restore the network from Tensorflow's checkpoint file. saver = tf.train.Saver() if restore: print('\nRestored Tensorflow graph from checkpoint file') saver.restore(sess, fnames['checkpt']) else: print('Starting with untrained network') print(f'\n----- Training for another {param.N} Epochs -----') try: epoch_ofs = conf.epoch + 1 lrates = np.logspace(np.log10(param.lr0), np.log10(param.lr1), param.N) t0_all = time.time() for epoch, lrate in enumerate(lrates): t0_epoch = time.time() tot_epoch = epoch + epoch_ofs print(f'\nEpoch {tot_epoch} ({epoch+1}/{param.N} in this training cycle)') ds.reset() trainEpoch(ds, net, log, lrate) # Save the network state and log data. pickle.dump(net.serialise(), open(fnames['orpac-net'], 'wb')) conf = conf._replace(epoch=epoch + epoch_ofs) meta = {'conf': conf, 'int2name': int2name, 'log': log} pickle.dump(meta, open(fnames['meta'], 'wb')) saver.save(sess, fnames['checkpt']) # Determine training time for epoch etime = str(datetime.timedelta(seconds=int(time.time() - t0_epoch))) et_h, et_m, et_s = etime.split(':') etime_str = f' Training time: {et_h}h {et_m}m {et_s}s' # Print basic stats about epoch. print(f'{etime_str} Learning Rate: {lrate:.1E}') etime = str(datetime.timedelta(seconds=int(time.time() - t0_all))) et_h, et_m, et_s = etime.split(':') print(f'\nTotal training time: {et_h}h {et_m}m {et_s}s\n') except KeyboardInterrupt: pass
class Orpac: # Specify how many times the decompose the input image with Wavelets. _NUM_WAVELET_DECOMPOSITIONS = 3 def __init__(self, sess, im_dim, num_layers, num_classes, bw_init, train): # Decide if we want to create cost nodes or not. assert isinstance(train, bool) # Backup basic variables. self._trainable = train self.sess = sess self.num_layers = num_layers self.num_classes = num_classes self.im_dim = im_dim # Create placeholder variable for Wavelet decomposed image. self._xin = self._createInputTensor(im_dim) # Setup the NMS nodes and Orpac network. self._setupNonMaxSuppression() with tf.variable_scope('orpac'): self.out = self._setupNetwork(self._xin, bw_init, np.float32) # Store shape of the output tensor. self.ft_dim = Shape(*self.out.shape.as_list()[1:]) # Define the cost nodes and compile them into a dictionary if this # network is trainable, otherwise do nothing. if self._trainable: self._cost_nodes, self._optimiser = self._addOptimiser() else: self._cost_nodes, self._optimiser = {}, None def session(self): """Return Tensorflow session""" return self.sess def getBias(self, layer): g = tf.get_default_graph().get_tensor_by_name return self.sess.run(g(f'orpac/b{layer}:0')) def getWeight(self, layer): g = tf.get_default_graph().get_tensor_by_name return self.sess.run(g(f'orpac/W{layer}:0')) def numLayers(self): return self.num_layers def numClasses(self): return self.num_classes def outputShape(self): """Return the shape of the network output (exclusive Batch dimension). For example, the output may be Shape(chan=18, height=64, width=64). """ # Sanity check: the number of output channels must match the value # returned by `numOutputChannels`. assert self.ft_dim.chan == self.numOutputChannels(self.numClasses()) return self.ft_dim.copy() def imageShape(self): return self.im_dim.copy() def output(self): return self.out def trainable(self): return self._trainable def costNodes(self): return dict(self._cost_nodes) @staticmethod def numOutputChannels(num_classes: int): """Return the number of feature channels when there are `num_classes`. This value specifes the number of channels that the final network layer will return. NOTE: this returns the same value as `featureShape.chan` but does not require an Orpac instance since it is a class method. Input: num_classes: int The number of output channels depends on the number of classes in the data set. This variables specifes that number. Returns: int: number of channels in final network output layer. """ return 4 + 2 + num_classes @staticmethod def setBBoxRects(y, val): y = np.array(y) assert y.ndim == 3 assert np.array(val).shape == y[:4].shape y[:4] = val return y @staticmethod def getBBoxRects(y): assert y.ndim == 3 return y[:4] @staticmethod def setIsFg(y, val): y = np.array(y) assert y.ndim == 3 assert np.array(val).shape == y[4:6].shape y[4:6] = val return y @staticmethod def getIsFg(y): assert y.ndim == 3 return y[4:6] @staticmethod def setClassLabel(y, val): y = np.array(y) assert y.ndim == 3 assert np.array(val).shape == y[6:].shape y[6:] = val return y @staticmethod def getClassLabel(y): assert y.ndim == 3 return y[6:] def _createInputTensor(self, im_dim): N = self._NUM_WAVELET_DECOMPOSITIONS im_dim = np.array(im_dim.hw()) / (2**N) width, height = im_dim.astype(np.int32).tolist() num_chan = 3 * (4**N) x_dim = (1, num_chan, height, width) return tf.placeholder(tf.float32, x_dim, name='x_in') def _addOptimiser(self): cost = createCostNodes(self.out) g = tf.get_default_graph().get_tensor_by_name lrate_in = tf.placeholder(tf.float32, name='lrate') opt = tf.train.AdamOptimizer(learning_rate=lrate_in).minimize(cost) nodes = { 'cls': g(f'orpac-cost/cls:0'), 'bbox': g(f'orpac-cost/bbox:0'), 'isFg': g(f'orpac-cost/isFg:0'), 'total': g(f'orpac-cost/total:0'), } return nodes, opt def _imageToInput(self, img): """Return Wavelet decomposed `img` The returned tensor is compatible with this class' `_xin` placeholder. The image dimensions must match those returned by `imageShape`, ie. it must be square, RGB and all its dimension must be powers of 2. Each colour channel will be decomposed self._NUM_WAVELET_DECOMPOSITIONS times. Inputs: img: UInt8 Array[height, width, 3] Output: Array[1, *imageToWaveletDim(img_shape)] The output dimension depends on the number of decompositions and the input size. For a 512x512x3 image with 3 decompositions the output would have Shape(chan=192, height=64, width=64). """ # Sanity check. assert isinstance(img, np.ndarray) and img.dtype == np.uint8 im_dim = self.imageShape() assert img.shape == im_dim.hwc() assert im_dim.isSquare() and im_dim.isPow2() # Normalise the image and put each colour channels as a separate image # into a work list. img = img.astype(np.float32) / 255 src = list(img.transpose([2, 0, 1])) # Decompose the each channel. for i in range(self._NUM_WAVELET_DECOMPOSITIONS): N = im_dim.width >> (i + 1) # Apply wavelet transform to every image in the worklist and place # the results in an output list. dst = [] while len(src) > 0: cA, (cH, cV, cD) = pywt.dwt2(src.pop(), 'db2', mode='symmetric') # All coefficients must be square and have identical dimensions. assert cA.shape == cH.shape == cV.shape == cD.shape assert cA.ndim == 2 and cA.shape[0] == cA.shape[1] # The wavelet decomposition reduces dimension by roughly 2. # However, due to transients the outputs are a bit larger than # that which is why we must trim them. Here we compute the # start/stop indices for the trimming. excess = cA.shape[0] - N assert excess >= 0 a = excess // 2 b = a + N assert b <= cA.shape[0] # Trim the coefficients. dst.append(cA[a:b, a:b]) dst.append(cH[a:b, a:b]) dst.append(cV[a:b, a:b]) dst.append(cD[a:b, a:b]) # Copy the output into the new work list and repeat the process. src = dst # Convert the Python list to Numpy and verify its shape. data = np.array(src, np.float32) assert data.shape == imageToWaveletDim(im_dim).chw() # Return the decomposed image with the leading batch dimension. return np.expand_dims(data, 0) def _setupNetwork(self, x_in, bw_init, dtype): # Convenience: shared arguments conv2d. opts = dict(padding='SAME', data_format='NCHW', strides=[1, 1, 1, 1]) num_ft_chan = 64 # Hidden conv layers. # Examples dimensions assume 128x128 RGB images. # Input : [-1, 3, 128, 128] ---> [-1, 64, 128, 128] # Kernel: 3x3 Features: 64 prev = x_in for i in range(self.num_layers - 1): prev_shape = tuple(prev.shape.as_list()) b_dim = (num_ft_chan, 1, 1) W_dim = (3, 3, prev_shape[1], num_ft_chan) b, W = unpackBiasAndWeight(bw_init, b_dim, W_dim, i, dtype) prev = tf.nn.relu(tf.nn.conv2d(prev, W, **opts) + b) del i, b, W, b_dim, W_dim # Conv output layer to learn the BBoxes and class labels. # Shape: [-1, 64, 64, 64] ---> [-1, num_out_chan, 64, 64] # Kernel: 33x33 num_out_chan = self.numOutputChannels(self.num_classes) prev_shape = tuple(prev.shape.as_list()) b_dim = (num_out_chan, 1, 1) W_dim = (33, 33, prev.shape[1], num_out_chan) b, W = unpackBiasAndWeight(bw_init, b_dim, W_dim, self.num_layers - 1, dtype) return tf.add(tf.nn.conv2d(prev, W, **opts), b, name='out') def _setupNonMaxSuppression(self): """Create non-maximum-suppression nodes. These are irrelevant for training but useful in the predictor to cull the flood of possible bounding boxes. """ with tf.variable_scope('non-max-suppression'): r_in = tf.placeholder(tf.float32, [None, 4], name='bb_rects') s_in = tf.placeholder(tf.float32, [None], name='scores') tf.image.non_max_suppression(r_in, s_in, 30, 0.2, name='op') def nonMaxSuppression(self, bb_rects, scores): """ Wrapper around Tensorflow's non-max-suppression function. Input: sess: Tensorflow sessions bb_rects: Array[N, 4] BBox rectangles, one per column. scores: Array[N] One scalar score for each BBox. Returns: idx: Array List of BBox indices that survived the operation. """ g = tf.get_default_graph().get_tensor_by_name fd = { g('non-max-suppression/scores:0'): scores, g('non-max-suppression/bb_rects:0'): bb_rects, } return self.sess.run(g('non-max-suppression/op:0'), feed_dict=fd) def train(self, img, y, lrate, mask_cls, mask_bbox, mask_isFg): assert self._trainable # Sanity checks assert lrate > 0 assert mask_cls.shape == mask_bbox.shape == mask_isFg.shape assert y.shape == self.ft_dim.chw() assert y.shape[1:] == mask_cls.shape # Feed dictionary. g = tf.get_default_graph().get_tensor_by_name fd = { self._xin: self._imageToInput(img), g(f'lrate:0'): lrate, g(f'orpac-cost/y_true:0'): np.expand_dims(y, 0), g(f'orpac-cost/mask_cls:0'): mask_cls, g(f'orpac-cost/mask_bbox:0'): mask_bbox, g(f'orpac-cost/mask_isFg:0'): mask_isFg, } # Run one optimisation step and return the costs. nodes = [self._cost_nodes, self._optimiser] costs, _ = self.sess.run(nodes, feed_dict=fd) return costs def predict(self, img): # Run predictor network. g = tf.get_default_graph().get_tensor_by_name out = self.sess.run(g(f'orpac/out:0'), feed_dict={self._xin: self._imageToInput(img)}) assert out.ndim == 4 and out.shape[0] == 1 return out[0] def serialise(self): out = {'weight': {}, 'bias': {}, 'num-layers': self.numLayers()} for i in range(self.num_layers): out['bias'][i] = self.getBias(i) out['weight'][i] = self.getWeight(i) return out