Exemplo n.º 1
0
 def set_samples(self, sample_bboxs):
     timer = common.Timer()
     bboxs = self.get_bbox_array(sample_bboxs)
     self.sample_bbox.set_value(bboxs)
     self.sample_bbox_list = sample_bboxs
     logging.debug("Took %i ms to set_samples" % timer.current_ms())
     return bboxs
Exemplo n.º 2
0
def get_overlap_iou(obj_bboxs, sample_bboxs):

    global overlap_func
    if overlap_func is None:
        logging.debug("Building overlap function")
        x_bboxs = tensor.matrix()
        y_bboxs = tensor.matrix()
        x_area = (x_bboxs[:, 2] - x_bboxs[:, 0]) * (x_bboxs[:, 3] -
                                                    x_bboxs[:, 1])
        y_area = (y_bboxs[:, 2] - y_bboxs[:, 0]) * (y_bboxs[:, 3] -
                                                    y_bboxs[:, 1])
        dx = tensor.maximum(
            tensor.minimum(x_bboxs[:, None, 2], y_bboxs[None, :, 2]) -
            tensor.maximum(x_bboxs[:, None, 0], y_bboxs[None, :, 0]), 0)
        dy = tensor.maximum(
            tensor.minimum(x_bboxs[:, None, 3], y_bboxs[None, :, 3]) -
            tensor.maximum(x_bboxs[:, None, 1], y_bboxs[None, :, 1]), 0)
        area_intersect = dx * dy
        area_union = (x_area[:, None] + y_area[None, :] - area_intersect)
        area_iou = area_intersect / area_union
        overlap_func = theano.function([x_bboxs, y_bboxs],
                                       area_iou,
                                       allow_input_downcast=True)

    if len(obj_bboxs) == 0 or len(sample_bboxs) == 0:
        return None
    else:
        x = numpy.array(obj_bboxs, dtype=numpy.float32)
        y = numpy.array(sample_bboxs, dtype=numpy.float32)
        return overlap_func(x, y)
Exemplo n.º 3
0
 def get_bbox_array(self, sample_bboxs):
     timer = common.Timer()
     bboxs = numpy.zeros(
         (self.batch_size, self.sample_num, self.sample_num, 4),
         dtype=numpy.float32)
     c_code.build_bbox_array(sample_bboxs, bboxs)
     logging.debug("Took %i ms to get_bbox_array" % timer.current_ms())
     return bboxs
Exemplo n.º 4
0
def debug_denet_export_targets(model, data_x, data_m, targets):
    logging.debug("DEBUGGING! Exporting targets")
    class_labels_inv = {v: k for k, v in model.class_labels.items()}
    for b in range(model.batch_size):

        #export ground truth
        for cls in set(data_m[b]["class"]):
            objs = []
            for obj_cls, obj in zip(data_m[b]["class"], data_m[b]["objs"]):
                if obj_cls == cls:
                    objs.append(obj)
            common.export_activation_rgb(
                "%06i_gt_%s.png" % (b, class_labels_inv[cls]),
                data_x[b, :, :, :], objs)

        #export targets
        for index, layer in enumerate(model.cost_layers):

            yt_index = targets[index * 2 + 0]
            yt_value = targets[index * 2 + 1]
            if layer.type_name == "denet-corner":
                corner_pr, = common.ndarray_unpack(yt_value,
                                                   [layer.corner_shape])
                common.export_activation("%06i_l%i_corner.png" % (b, index),
                                         corner_pr[b, 1, :, :, :])

            elif layer.type_name == "denet-detect":
                det_pr, = common.ndarray_unpack(yt_value, [layer.det_shape])
                for i, sample in enumerate(layer.sparse_layer.sample_bboxs[b]):
                    sample_i = i % layer.sparse_layer.sample_num
                    sample_j = i // layer.sparse_layer.sample_num
                    logging.debug("%i - sample:" % b, (sample_i, sample_j),
                                  "bbox:", (int(sample[2][0] * model.width),
                                            int(sample[2][1] * model.height),
                                            int(sample[2][2] * model.width),
                                            int(sample[2][3] * model.height)),
                                  "pr:", sample[1], "corner:", sample[0])

                common.export_activation("%06i_l%i_det.png" % (b, index),
                                         det_pr[b, :, :, :])
Exemplo n.º 5
0
    def get_samples(self, data_x, train=False, store_shared=False):

        global profile
        if self.corner_func is None:
            logging.verbose("Building corner function - store samples:",
                            store_shared, "train:", train)
            updates = [(self.corner_layer.sample_shared,
                        self.corner_layer.sample)] if store_shared else []
            self.corner_func = theano.function(
                [self.model_input],
                self.corner_layer.corner_pr,
                updates=updates,
                profile=profile,
                givens=[(get_train(), tensor.cast(int(train), 'int8'))],
                on_unused_input='ignore')

        #find corners
        timer = common.Timer()
        logging.debug("Running corner function")
        corner_pr = self.corner_func(data_x)

        if profile:
            logging.debug("Profiling corner function")
            theano_util.profile(self.corner_func, 10, data_x)
            theano_util.export_graph("./corner.graph", self.corner_func)
            logging.debug("Done")
            exit(0)

        #build sampling bounding boxs
        timer.mark()
        logging.debug("Build samples (%i threads)" % self.thread_num)
        samples = c_code.build_samples(self.thread_num, corner_pr,
                                       self.corner_threshold, self.sample_num,
                                       self.corner_max, self.local_max,
                                       self.nms_threshold)

        timer.mark()
        logging.verbose(
            "Took %i ms to get_samples (%i model, %i build, %i max corners) " %
            (timer.current_ms(), timer.delta_ms(0), timer.delta_ms(1),
             self.corner_max))
        return samples
Exemplo n.º 6
0
def run_train_epoch(args, update_client, workers, model, train_data,
                    learn_rate):

    import model_cnn

    logging.info("Perform train...")
    batch_size_factor = args.batch_size_factor
    output_prefix = args.output_prefix
    model_dims = args.model_dims
    model_save_dt = args.model_save_dt * 60

    #update learning rates:
    for worker in workers:
        with worker.learn_rate.get_lock():
            worker.learn_rate.value = learn_rate

    #randomly shuffle data before each epoch, set seed to ensure each node has same data order
    random.seed(args.seed + update_client.epoch)
    train_data.shuffle()

    #perform initial sync so that all nodes have the same model
    model_update = shared.ModelUpdate(model_dims)
    model_update.import_updates(model)
    # update_client.sync(model_update, workers, initial=True)

    #get subset next
    subset_next = update_client.get_subset_next()

    #start export of data
    batch_size = len(workers) * model.batch_size * batch_size_factor
    logging.info(
        "SGD batch size is %ix%ix%i = %i" %
        (batch_size_factor, len(workers), model.batch_size, batch_size))
    export_thread = DatasetExportThread(model, train_data, subset_next,
                                        batch_size, True)

    #start processing
    total_cost = 0
    total_it = 0
    subset_current = subset_next
    epoch_current = update_client.epoch
    for worker in workers:
        worker.set_epoch(epoch_current)

    timer = common.Timer()
    timer_save = common.Timer()
    while subset_next >= 0:

        #wait until export is ready
        timer.reset()
        export_thread.wait()
        data_x, data_y, data_size = export_thread.get_export()
        subset_current = subset_next
        del export_thread
        if timer.current() > 1:
            logging.warning(
                "Warning: needed an additional %.1f seconds for dataset export"
                % timer.current())

        #print training classes for checking random seed etc
        logging.debug("Sample Metas: ", data_y[0:min(3, len(data_y))])

        #start exporting next subset
        subset_next = update_client.get_subset_next()
        if subset_next >= 0:
            export_thread = DatasetExportThread(model, train_data, subset_next,
                                                batch_size, True)

        # #store initial model before changes
        # model_update_delta = model_update.copy()

        logging.info("Evaluating training function")
        timer.reset()
        batch_num = data_x.shape[0] // model.batch_size
        it_num = batch_num // (len(workers) * batch_size_factor)
        index = 0
        subset_cost = 0

        while (index < batch_num):

            total_ts = time.time()

            def train_worker_thread(worker, indexs):
                worker.wait()
                worker.model_write(model_update)
                worker.train_begin()
                for i in indexs:
                    dx = data_x[i * model.batch_size:(i + 1) *
                                model.batch_size]
                    dy = data_y[i * model.batch_size:(i + 1) *
                                model.batch_size]
                    worker.train_step(dx, dy)
                    worker.wait()
                worker.train_end()
                worker.model_read()
                worker.wait()

            threads = []
            for worker in workers:
                worker_indexs = []
                for _ in range(batch_size_factor):
                    if index < batch_num:
                        worker_indexs.append(index)
                        index += 1

                t = threading.Thread(target=train_worker_thread,
                                     args=(worker, worker_indexs))
                t.start()
                threads.append((t, time.time()))

            proc_ts = []
            for t, start_ts in threads:
                t.join()
                proc_ts.append(int(1000 * (time.time() - start_ts)))

            #average models between GPUS and print batch info
            combine_ts = time.time()
            batch_cost = 0
            model_update.set_mean_init()
            for worker in workers:
                model_update.set_mean_update(worker.model_update)
                with worker.cost.get_lock():
                    batch_cost += worker.cost.value
            model_update.set_mean_finish()
            batch_cost /= len(workers)
            subset_cost += batch_cost
            it_index = index // (len(workers) * batch_size_factor)
            combine_ts = int(1000 * (time.time() - combine_ts))

            logging.verbose("Processing times (ms):", proc_ts,
                            ", Combine time: %i ms" % combine_ts)
            logging.info(
                "Subset %i/%i, Batch It %i/%i" %
                (subset_current + 1, train_data.subset_num, it_index, it_num),
                "- Cost:", batch_cost,
                "Time: %i ms" % (1000 * (time.time() - total_ts)))

        logging.info(
            "Training subset %i took %0.1f sec, mean cost:" %
            (subset_current + 1, timer.current()), subset_cost / it_num)
        total_it += it_num
        total_cost += subset_cost

        #update with server (if one exists)
        model_update.export_updates(model)
        # model_update_delta.set_delta(model_update)
        # update_client.update(model_update_delta, model_update, workers)

        #save intermediate models
        if timer_save.current() > model_save_dt and model_save_dt > 0:
            model_cnn.save_to_file(
                model, output_prefix + "_epoch%03i_subset%03i.mdl.gz" %
                (epoch_current, subset_current + 1))
            timer_save.reset()

    #perform final sync so that all nodes have the same model
    update_client.sync(model_update, workers)

    #save final models
    model_cnn.save_to_file(
        model, output_prefix + "_epoch%03i_final.mdl.gz" % (epoch_current))

    return (total_cost / total_it)