def predict(flags, net: Net, framework: Framework): log = get_logger() io = SharedFlagIO(flags, subprogram=True) pool = ThreadPool() flags = io.read_flags() if io.read_flags() is not None else flags all_inps = [i for i in os.listdir(flags.imgdir) if framework.is_input(i)] if not all_inps: raise FileNotFoundError(f'Failed to find any images in {flags.imgdir}') batch = min(flags.batch, len(all_inps)) n_batch = int(math.ceil(len(all_inps) / batch)) for j in range(n_batch): start = j * batch stop = min(start + batch, len(all_inps)) this_batch = all_inps[start:stop] img_path = partial(os.path.join, flags.imgdir) log.info(f'Preprocessing {batch} inputs...') with Timer() as t: x = pool.map(lambda inp: framework.preprocess(img_path(inp)), this_batch) log.info(f'Done! ({batch/t.elapsed_secs:.2f} inputs/s)') log.info(f'Forwarding {batch} inputs...') with Timer() as t: x = [np.concatenate(net(np.expand_dims(i, 0)), 0) for i in x] log.info(f'Done! ({batch/t.elapsed_secs:.2f} inputs/s)') log.info(f'Postprocessing {batch} inputs...') with Timer() as t: postprocess = lambda i, pred: framework.postprocess( pred, img_path(this_batch[i])) pool.map(lambda p: postprocess(*p), enumerate(x)) log.info(f'Done! ({batch/t.elapsed_secs:.2f} inputs/s)')
def train(data, class_weights, flags, net: Net, framework: Framework, manager: tf.train.CheckpointManager): log = get_logger() io = SharedFlagIO(flags, subprogram=True) flags = io.read_flags() if io.read_flags() is not None else flags log.info('Building {} train op'.format(flags.model)) goal = len(data) * flags.epoch first = True for i, (x_batch, loss_feed) in enumerate(framework.shuffle(data, class_weights)): loss = net(x_batch, training=True, **loss_feed) step = net.step.numpy() lr = net.optimizer.learning_rate.numpy() line = 'step: {} loss: {:f} lr: {:.2e} progress: {:.2f}%' if not first: flags.progress = i * flags.batch / goal * 100 log.info(line.format(step, loss, lr, flags.progress)) else: log.info(f"Following gradient from step {step}...") io.send_flags() flags = io.read_flags() ckpt = bool(not step % flags.save) if ckpt and not first: save = manager.save() log.info(f"Saved checkpoint: {save}") first = False if not ckpt: save = manager.save() log.info(f"Finished training at checkpoint: {save}")
def annotate(flags, net, framework): log = get_logger() io = SharedFlagIO(flags, subprogram=True) flags = io.read_flags() if io.read_flags() is not None else flags for video in flags.video: frame_count = 0 capture = cv2.VideoCapture(video) total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) annotation_file = f'{os.path.splitext(video)[0]}_annotations.csv' if os.path.exists(annotation_file): log.info("Overwriting existing annotations") os.remove(annotation_file) log.info(f'Annotating {video}') with open(annotation_file, mode='a') as file: file_writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) while capture.isOpened(): frame_count += 1 if frame_count % 10 == 0: flags.progress = round((100 * frame_count / total_frames), 0) io.io_flags() ret, frame = capture.read() if ret: frame = np.asarray(frame) h, w, _ = frame.shape im = framework.resize_input(frame) this_inp = np.expand_dims(im, 0) boxes = framework.findboxes( np.concatenate(net(this_inp), 0)) pred = [ framework.process_box(b, h, w, flags.threshold) for b in boxes ] pred = filter(None, pred) time_elapsed = capture.get(cv2.CAP_PROP_POS_MSEC) / 1000 [ file_writer.writerow([time_elapsed, *result]) for result in pred ] else: break if flags.kill: capture.release() exit(1) capture.release()
class NetBuilder(tf.Module): """Initializes with flags that build a Darknet or with a prebuilt Darknet. Constructs the actual :obj:`Net` object upon being called. """ def __init__(self, flags, darknet=None): super(NetBuilder, self).__init__(name=self.__class__.__name__) tf.autograph.set_verbosity(0) self.io = SharedFlagIO(subprogram=True) self.flags = self.io.read_flags() if self.io.read_flags( ) is not None else flags self.io_flags = self.io.io_flags self.logger = get_logger() self.darknet = Darknet(flags) if darknet is None else darknet self.num_layer = self.ntrain = len(self.darknet.layers) or 0 self.meta = self.darknet.meta def __call__(self): self.global_step = tf.Variable(0, trainable=False) framework = Framework.create(self.darknet.meta, self.flags) self.annotation_data, self.class_weights = framework.parse() optimizer = self.build_optimizer() layers = self.compile_darknet() net = Net(layers, self.global_step, dtype=tf.float32) ckpt_kwargs = {'net': net, 'optimizer': optimizer} self.checkpoint = tf.train.Checkpoint(**ckpt_kwargs) name = f"{self.meta['name']}" manager = tf.train.CheckpointManager(self.checkpoint, self.flags.backup, self.flags.keep, checkpoint_name=name) # try to load a checkpoint from flags.load self.load_checkpoint(manager) self.logger.info('Compiling Net...') net.compile(loss=framework.loss, optimizer=optimizer) return net, framework, manager def build_optimizer(self): # setup kwargs for trainer kwargs = dict() if self.flags.trainer in MOMENTUM_USERS: kwargs.update({MOMENTUM: self.flags.momentum}) if self.flags.trainer is NESTEROV: kwargs.update({self.flags.trainer: True}) if self.flags.trainer is AMSGRAD: kwargs.update({AMSGRAD.lower(): True}) if self.flags.clip: kwargs.update({'clipnorm': self.flags.clip_norm}) ssc = self.flags.step_size_coefficient step_size = int(ssc * (len(self.annotation_data) // self.flags.batch)) clr_kwargs = { 'global_step': self.global_step, 'mode': self.flags.clr_mode, 'step_size': step_size, 'learning_rate': self.flags.lr, 'max_lr': self.flags.max_lr, 'name': self.flags.model } # setup trainer return TRAINERS[self.flags.trainer]( learning_rate=lambda: clr(**clr_kwargs), **kwargs) def compile_darknet(self): layers = list() roof = self.num_layer - self.ntrain prev = None for i, layer in enumerate(self.darknet.layers): layer = op_create(layer, prev, i, roof) layers.append(layer) prev = layer return layers def load_checkpoint(self, manager): if isinstance(self.flags.load, str): checkpoint = [ i for i in manager.checkpoints if self.flags.load in i ] assert len(checkpoint) == 1 self.checkpoint.restore(checkpoint) self.logger.info(f"Restored from {checkpoint}") elif self.flags.load < 0: self.checkpoint.restore(manager.latest_checkpoint) self.logger.info(f"Restored from {manager.latest_checkpoint}") elif self.flags.load >= 1: idx = self.flags.load - 1 self.checkpoint.restore(manager.checkpoints[idx]) self.logger.info(f"Restored from {manager.checkpoints[idx]}") else: self.logger.info("Initializing network weights from scratch.")
import os import sys sys.path.append(os.getcwd()) from beagles.io.flags import SharedFlagIO from beagles.backend.net import NetBuilder, train, predict, annotate if __name__ == '__main__': io = SharedFlagIO(subprogram=True) flags = io.read_flags() flags.started = True net_builder = NetBuilder(flags=flags) net, framework, manager = net_builder() flags = io.read_flags() if flags.train: train(net_builder.annotation_data, net_builder.class_weights, flags, net, framework, manager) elif flags.video: annotate(flags, net, framework) else: predict(flags, net, framework) flags = io.read_flags() flags.progress = 100.0 flags.done = True io.io_flags() exit(0)
class TFNet: # Interface Methods: def __init__(self, flags, darknet=None): self.io = SharedFlagIO(subprogram=True) # disable eager mode for TF1-dependent code tf.compat.v1.disable_eager_execution() self.flags = self.io.read_flags() if self.io.read_flags( ) is not None else flags self.io_flags = self.io.io_flags self.logger = get_logger() self.ntrain = 0 darknet = Darknet(flags) if darknet is None else darknet self.ntrain = len(darknet.layers) self.darknet = darknet self.num_layer = len(darknet.layers) self.framework = Framework.create(darknet.meta, flags) self.annotation_data = self.framework.parse() self.meta = darknet.meta self.graph = tf.Graph() device_name = flags.gpu_name if flags.gpu > 0.0 else None start = time.time() with tf.device(device_name): with self.graph.as_default(): self.build_forward() self.setup_meta_ops() self.logger.info('Finished in {}s'.format(time.time() - start)) def raise_error(self, error: Exception, traceback=None): form = "{}\nOriginal Tensorflow Error: {}" try: raise error except Exception as e: if traceback: oe = traceback.message self.flags.error = form.format(str(e), oe) else: self.flags.error = str(e) self.logger.error(str(e)) self.io.send_flags() raise def build_forward(self): # Placeholders inp_size = self.meta['inp_size'] self.inp = tf.keras.layers.Input(dtype=tf.float32, shape=tuple(inp_size), name='input') self.feed = dict() # other placeholders # Build the forward pass state = identity(self.inp) roof = self.num_layer - self.ntrain self.logger.info(LINE) self.logger.info(HEADER) self.logger.info(LINE) for i, layer in enumerate(self.darknet.layers): scope = '{}-{}'.format(str(i), layer.type) args = [layer, state, i, roof, self.feed] state = op_create(*args) mess = state.verbalise() msg = mess if mess else LINE self.logger.info(msg) self.top = state self.out = tf.identity(state.out, name='output') def setup_meta_ops(self): tf.config.set_soft_device_placement(False) tf.debugging.set_log_device_placement(False) utility = min(self.flags.gpu, 1.) if utility > 0.0: tf.config.set_soft_device_placement(True) else: self.logger.info('Running entirely on CPU') if self.flags.train: self.build_train_op() if self.flags.summary: self.summary_op = tf.compat.v1.summary.merge_all() self.writer = tf.compat.v1.summary.FileWriter( self.flags.summary + self.flags.project_name) self.sess = tf.compat.v1.Session() self.sess.run(tf.compat.v1.global_variables_initializer()) if not self.ntrain: return try: self.saver = tf.compat.v1.train.Saver( tf.compat.v1.global_variables()) if self.flags.load != 0: self.load_from_ckpt() except tf.errors.NotFoundError as e: self.flags.error = str(e.message) self.send_flags() raise if self.flags.summary: self.writer.add_graph(self.sess.graph) def load_from_ckpt(self): if self.flags.load < 0: # load lastest ckpt with open(os.path.join(self.flags.backup, 'checkpoint'), 'r') as f: last = f.readlines()[-1].strip() load_point = last.split(' ')[1] load_point = load_point.split('"')[1] print(load_point) load_point = load_point.split('-')[-1] self.flags.load = int(load_point) load_point = os.path.join(self.flags.backup, self.meta['name']) load_point = '{}-{}'.format(load_point, self.flags.load) self.logger.info('Loading from {}'.format(load_point)) try: self.saver.restore(self.sess, load_point) except ValueError: self.load_old_graph(load_point) def load_old_graph(self, ckpt): ckpt_loader = Loader.create(ckpt) self.logger.info(old_graph_msg.format(ckpt)) for var in tf.compat.v1.global_variables(): name = var.name.split(':')[0] args = [name, var.get_shape()] val = ckpt_loader(*args) if val is None: self.raise_error(VariableIsNone(var)) shp = val.shape plh = tf.compat.v1.placeholder(tf.float32, shp) op = tf.compat.v1.assign(var, plh) self.sess.run(op, {plh: val}) def build_train_op(self): self.framework.loss(self.out) self.logger.info('Building {} train op'.format(self.meta['model'])) self.global_step = tf.Variable(0, trainable=False) # setup kwargs for trainer kwargs = dict() if self.flags.trainer in ['momentum', 'rmsprop', 'nesterov']: kwargs.update({'momentum': self.flags.momentum}) if self.flags.trainer == 'nesterov': kwargs.update({self.flags.trainer: True}) if self.flags.trainer == 'AMSGrad': kwargs.update({self.flags.trainer.lower(): True}) if self.flags.clip: kwargs.update({'clipnorm': self.flags.clip_norm}) # setup cyclic_learning_rate args ssc = self.flags.step_size_coefficient step_size = int(ssc * (len(self.annotation_data) // self.flags.batch)) clr_kwargs = { 'global_step': self.global_step, 'mode': self.flags.clr_mode, 'step_size': step_size, 'learning_rate': self.flags.lr, 'max_lr': self.flags.max_lr, 'name': 'learning-rate' } # setup trainer self.optimizer = TRAINERS[self.flags.trainer](clr(**clr_kwargs), **kwargs) # setup gradients for all globals except the global_step vars = tf.compat.v1.global_variables()[:-1] # grads = self.optimizer.get_gradients(self.framework.loss, vars) self.train_op = self.optimizer.apply_gradients(zip(grads, vars))