Пример #1
0
def train():
    init_log_config()
    init_train_parameters()
    logger.info("start train YOLOv3, train params:%s", str(train_parameters))

    logger.info("create place, use gpu:" + str(train_parameters['use_gpu']))
    place = fluid.CUDAPlace(0) if train_parameters['use_gpu'] else fluid.CPUPlace()

    logger.info("build network and program")
    train_program = fluid.Program()
    start_program = fluid.Program()
    feeder, reader, loss = build_program_with_feeder(train_program, start_program, place)

    logger.info("build executor and init params")
    exe = fluid.Executor(place)
    exe.run(start_program)
    train_fetch_list = [loss.name]
    load_pretrained_params(exe, train_program)

    stop_strategy = train_parameters['early_stop']
    successive_limit = stop_strategy['successive_limit']
    sample_freq = stop_strategy['sample_frequency']
    min_curr_map = stop_strategy['min_curr_map']
    min_loss = stop_strategy['min_loss']
    stop_train = False
    successive_count = 0
    total_batch_count = 0
    valid_thresh = train_parameters['valid_thresh']
    nms_thresh = train_parameters['nms_thresh']
    current_best_loss = 10000000000.0
    for pass_id in range(train_parameters["num_epochs"]):
        logger.info("current pass: {}, start read image".format(pass_id))
        batch_id = 0
        total_loss = 0.0
        for batch_id, data in enumerate(reader()):
            t1 = time.time()
            loss = exe.run(train_program, feed=feeder.feed(data), fetch_list=train_fetch_list)
            period = time.time() - t1
            loss = np.mean(np.array(loss))
            total_loss += loss
            batch_id += 1
            total_batch_count += 1

            if batch_id % 10 == 0:  # 调整日志输出的频率
                logger.info("pass {}, trainbatch {}, loss {} time {}".format(pass_id, batch_id, loss, "%2.2f sec" % period))
        pass_mean_loss = total_loss / batch_id
        logger.info("pass {0} train result, current pass mean loss: {1}".format(pass_id, pass_mean_loss))
        # 采用每训练完一轮停止办法,可以调整为更精细的保存策略
        if pass_mean_loss < current_best_loss:
            logger.info("temp save {} epcho train result, current best pass loss {}".format(pass_id, pass_mean_loss))
            fluid.io.save_persistables(dirname=train_parameters['save_model_dir'],
                                       main_program=train_program,
                                       executor=exe)
            current_best_loss = pass_mean_loss

    logger.info("training till last epcho, end training")
    fluid.io.save_persistables(dirname=train_parameters['save_model_dir'],
                               main_program=train_program,
                               executor=exe)
Пример #2
0
@Version :   1.0
@Contact :   [email protected]
@License :   
@Desc    :   None
'''

# here put the import lib
import numpy as np
import config
import random
import os
from PIL import Image, ImageEnhance
import xml
import cv2

train_parameters = config.init_train_parameters()


def box_to_center_relative(box, img_height, img_width):
    """
    将COCO数据集的标注框格式[x1, y1, w, h]转换成中心坐标模式[center_x, center_y, w, h]
    将绝对坐标值除以图片的H,W 归一化
    """
    assert len(box) == 4, "box should be a len(4) list or tuple"
    x, y, w, h = box

    x1 = max(x, 0)
    x2 = min(x + w - 1, img_width - 1)
    y1 = max(y, 0)
    y2 = min(y + h - 1, img_height - 1)
Пример #3
0
                avg_loss.backward()
                t4 = time.time()
                backward_time = t4 - t3
                optimizer.minimize(avg_loss)
                net.clear_gradients()
                # print(forward_time, backward_time)

                dy_param_value = {}
                for param in net.parameters():
                    dy_param_value[param.name] = param.numpy

                if batch_id % 40 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(epoch_num, batch_id, avg_loss.numpy(),
                                                                               acc.numpy()))

            net.eval()
            epoch_acc = eval_net(test_reader, net)
            net.train()
            if epoch_acc > best_acc:
                fluid.dygraph.save_dygraph(net.state_dict(), train_parameters["save_persistable_dir"])
                fluid.dygraph.save_dygraph(optimizer.state_dict(), train_parameters["save_persistable_dir"])
                best_acc = epoch_acc
                logger.info("model saved at epoch {}, best accuracy is {}".format(epoch_num, best_acc))
        logger.info("Final loss: {}".format(avg_loss.numpy()))


if __name__ == "__main__":
    init_log_config()
    init_train_parameters()
    train()
Пример #4
0
def train():
    # 初始化 train_train_parameters 中的参数。class_dim等。
    init_train_parameters()
    print("start ssd, train params:", str(train_parameters))
    logger.info("start ssd, train params: %s", str(train_parameters))

    # 定义设备训练场所
    logger.info("create place, use gpu:" + str(train_parameters['use_gpu']))
    place = fluid.CUDAPlace(
        0) if train_parameters['use_gpu'] else fluid.CPUPlace()

    # 定义了 program
    logger.info("build network and program")
    train_program = fluid.Program()
    start_program = fluid.Program()
    eval_program = fluid.Program()

    # 构造训练用的 program
    train_reader, img, loss, locs, confs, box, box_var = build_train_program_with_async_reader(
        train_program, start_program)

    # 构造验证用的program
    eval_feeder, eval_reader, cur_map, accum_map, nmsed_out = build_eval_program_with_feeder(
        eval_program, start_program, place)
    eval_program = eval_program.clone(for_test=True)

    logger.info("build executor and init params")
    # 创建Executor
    exe = fluid.Executor(place)
    exe.run(start_program)

    # 定义训练、预测的输出值
    train_fetch_list = [loss.name]
    eval_fetch_list = [cur_map.name, accum_map.name]

    # 加载mobilenet预训练的参数到train_program中
    load_pretrained_params(exe, train_program)

    # 获取early_stop参数
    stop_strategy = train_parameters['early_stop']
    successive_limit = stop_strategy['successive_limit']
    sample_freq = stop_strategy['sample_frequency']
    min_curr_map = stop_strategy['min_curr_map']
    min_loss = stop_strategy['min_loss']
    stop_train = False
    total_batch_count = 0
    successive_count = 0
    for pass_id in range(train_parameters["num_epochs"]):
        logger.info("current pass: %d, start read image", pass_id)
        batch_id = 0
        train_reader.start()
        try:
            while True:
                t1 = time.time()
                loss = exe.run(train_program, fetch_list=train_fetch_list)
                period = time.time() - t1
                loss = np.mean(np.array(loss))
                batch_id += 1
                total_batch_count += 1

                if batch_id % 10 == 0:  # 每10个批次打印一次损失
                    logger.info(
                        "Pass {0}, trainbatch {1}, loss {2} time {3}".format(
                            pass_id, batch_id, loss, "%2.2f sec" % period))
                    print("Pass {0}, trainbatch {1}, loss {2} time {3}".format(
                        pass_id, batch_id, loss, "%2.2f sec" % period))

                if total_batch_count % 400 == 0:  # 每训练400批次的数据,保存一次模型
                    logger.info("temp save {0} batch train result".format(
                        total_batch_count))
                    print("temp save {0} batch train result".format(
                        total_batch_count))
                    fluid.io.save_persistables(
                        dirname=train_parameters[
                            'save_model_dir'],  ##从program中取出变量,将其存入指定目录中
                        filename=train_parameters['model_prefix'] + '-retrain',
                        main_program=train_program,
                        executor=exe)

                if total_batch_count == 1 or total_batch_count % sample_freq == 0:  # 满足一定条件,进行一次验证
                    for data in eval_reader():
                        cur_map_v, accum_map_v = exe.run(
                            eval_program,
                            feed=eval_feeder.feed(data),
                            fetch_list=eval_fetch_list)
                        break
                    logger.info(
                        "{0} batch train, cur_map:{1} accum_map_v:{2} loss:{3}"
                        .format(total_batch_count, cur_map_v[0],
                                accum_map_v[0], loss))
                    print(
                        "{0} batch train, cur_map:{1} accum_map_v:{2} loss:{3}"
                        .format(total_batch_count, cur_map_v[0],
                                accum_map_v[0], loss))
                    # 在验证过程中,map大于所设置的最小的map,或损失小于所设置的最小的损失,认为目标识别正确,successive_count加1
                    if cur_map_v[0] > min_curr_map or loss <= min_loss:
                        successive_count += 1
                        print("successive_count: ", successive_count)
                        fluid.io.save_inference_model(
                            dirname=train_parameters['save_model_dir'],
                            params_filename=train_parameters['model_prefix'] +
                            '-params',
                            model_filename=train_parameters['model_prefix'] +
                            '-model',
                            feeded_var_names=['img'],
                            target_vars=[nmsed_out],
                            main_program=eval_program,
                            executor=exe)
                        # 三次达到验证效果,则停止训练
                        if successive_count >= successive_limit:
                            logger.info("early stop, end training")
                            print("early stop, end training")
                            stop_train = True
                            break
                    else:
                        successive_count = 0
            if stop_train:
                break
        except fluid.core.EOFException:
            train_reader.reset()

    logger.info("training till last epcho, end training")
    print("training till last epcho, end training")
    save_model(train_parameters['save_model_dir'],
               train_parameters['model_prefix'] + '-final', ['img'],
               [nmsed_out], train_program, eval_program, exe)