示例#1
0
def pairwise_stats():
    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(True)
    # dataset.set_shuffle(False)
    dataset.set_pairwise_stats_collect(True)

    num_images = dataset.num_images
    all_pairwise_differences = {}

    if cfg.mirror:
        num_images *= 2

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images - 1))

        batch = dataset.next_batch()
        batch_stats = batch[Batch.data_item].pairwise_stats
        for joint_pair in batch_stats:
            if joint_pair not in all_pairwise_differences:
                all_pairwise_differences[joint_pair] = []
            all_pairwise_differences[joint_pair] += batch_stats[joint_pair]

    stats = {}
    for joint_pair in all_pairwise_differences:
        stats[joint_pair] = {}
        stats[joint_pair]["mean"] = np.mean(
            all_pairwise_differences[joint_pair], axis=0)
        stats[joint_pair]["std"] = np.std(all_pairwise_differences[joint_pair],
                                          axis=0)

    save_stats(stats, cfg)
def pairwise_stats():
    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(True)
    dataset.set_pairwise_stats_collect(True)

    num_images = dataset.num_images
    all_pairwise_differences = {}

    if cfg.mirror:
        num_images *= 2

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images-1))

        batch = dataset.next_batch()
        batch_stats = batch[Batch.data_item].pairwise_stats
        for joint_pair in batch_stats:
            if joint_pair not in all_pairwise_differences:
                all_pairwise_differences[joint_pair] = []
            all_pairwise_differences[joint_pair] += batch_stats[joint_pair]

    stats = {}
    for joint_pair in all_pairwise_differences:
        stats[joint_pair] = {}
        stats[joint_pair]["mean"] = np.mean(all_pairwise_differences[joint_pair], axis=0)
        stats[joint_pair]["std"] = np.std(all_pairwise_differences[joint_pair], axis=0)

    save_stats(stats, cfg)
示例#3
0
def train():
    setup_logging()

    cfg = load_config()
    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']
    print("total_loss:", total_loss)
    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()
    print("merged_summaries:", merged_summaries)
    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    restorer = tf.train.Saver(variables_to_restore)
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg)
    for it in range(max_iter + 1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val,
         summary] = sess.run([train_op, total_loss, merged_summaries],
                             feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)
        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}".format(
                it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])
def main(option):
    start_time = time.time()
    cfg = load_config("demo/pose_cfg_multi.yaml")
    dataset = create_dataset(cfg)
    sm = SpatialModel(cfg)
    sm.load()
    tf.reset_default_graph()
    draw_multi = PersonDraw()
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)
    fps_time = 0
    # Read image from file
    slopes = {}
    k = 0
    cap = cv2.VideoCapture("http://192.168.43.31:8081")
    cap_user = cv2.VideoCapture('/dev/video0')
    cap = cap_user

    i = 0
    while (True):
        ret, orig_frame = cap.read()
        ret2, orig_frame_user = cap_user.read()
        if i % 25 == 0:
            #frame=orig_frame
            frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50)
            user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50)
            co1 = run_predict(frame, sess, outputs, inputs, cfg, dataset, sm,
                              draw_multi)
            print("CO1            ", co1)
            user_co1 = run_predict(user_frame, sess, outputs, inputs, cfg,
                                   dataset, sm, draw_multi)
            print("USER_CO1            ", user_co1)
            print("CO1            ", co1)
            k = None
            try:
                slope_reqd, slope_user = slope_calc(co1, user_co1)
                k, s = compare_images(slope_reqd, slope_user, 0.75)
            except IndexError:
                #if len(co1)!=len(user_co1):
                print("Except condition")
                pass
            vibrate(k)
            frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0)
            user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0)
            cv2.putText(user_frame,
                        "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            cv2.imshow('user_frame', user_frame)
            cv2.imshow('frame', frame)
            fps_time = time.time()
            #visualize.waitforbuttonpress()
            if cv2.waitKey(10) == ord('q'):
                break
    elapsed = time.time() - start_time
    cap.release()
    cap_user.release()
    cv2.destroyAllWindows()
示例#5
0
文件: test.py 项目: nichtsen/pose-reg
def test_net(visualise, cache_scoremaps):
    logging.basicConfig(level=logging.INFO)

    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(False)
    dataset.set_test_mode(True)

    sess, inputs, outputs = setup_pose_prediction(cfg)

    if cache_scoremaps:
        out_dir = cfg.scoremap_dir
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    num_images = dataset.num_images
    predictions = np.zeros((num_images, ), dtype=np.object)

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images - 1))

        batch = dataset.next_batch()

        outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]})

        scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg)

        pose = argmax_pose_predict(scmap, locref, cfg.stride)

        pose_refscale = np.copy(pose)
        pose_refscale[:, 0:2] /= cfg.global_scale
        predictions[k] = pose_refscale

        if visualise:
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            visualize.show_heatmaps(cfg, img, scmap, pose)
            visualize.waitforbuttonpress()

        if cache_scoremaps:
            base = os.path.basename(batch[Batch.data_item].im_path)
            raw_name = os.path.splitext(base)[0]
            out_fn = os.path.join(out_dir, raw_name + '.mat')
            scipy.io.savemat(out_fn,
                             mdict={'scoremaps': scmap.astype('float32')})

            out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat')
            if cfg.location_refinement:
                scipy.io.savemat(
                    out_fn, mdict={'locreg_pred': locref.astype('float32')})

    scipy.io.savemat('predictions.mat', mdict={'joints': predictions})

    sess.close()
示例#6
0
def test_net(visualise, cache_scoremaps):
    logging.basicConfig(level=logging.INFO)

    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(False)
    dataset.set_test_mode(True)

    sess, inputs, outputs = setup_pose_prediction(cfg)

    if cache_scoremaps:
        out_dir = cfg.scoremap_dir
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    num_images = dataset.num_images
    predictions = np.zeros((num_images,), dtype=np.object)

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images-1))

        batch = dataset.next_batch()

        outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]})

        scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg)

        pose = argmax_pose_predict(scmap, locref, cfg.stride)

        pose_refscale = np.copy(pose)
        pose_refscale[:, 0:2] /= cfg.global_scale
        predictions[k] = pose_refscale

        if visualise:
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            visualize.show_heatmaps(cfg, img, scmap, pose)
            visualize.waitforbuttonpress()

        if cache_scoremaps:
            base = os.path.basename(batch[Batch.data_item].im_path)
            raw_name = os.path.splitext(base)[0]
            out_fn = os.path.join(out_dir, raw_name + '.mat')
            scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')})

            out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat')
            if cfg.location_refinement:
                scipy.io.savemat(out_fn, mdict={'locreg_pred': locref.astype('float32')})

    scipy.io.savemat('predictions.mat', mdict={'joints': predictions})

    sess.close()
def main(option):
    cfg = load_config("demo/pose_cfg_multi.yaml")
    dataset = create_dataset(cfg)
    sm = SpatialModel(cfg)
    sm.load()
    tf.reset_default_graph()
    draw_multi = PersonDraw()
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)
    fps_time = 0
    # Read image from file
    cap = cv2.VideoCapture('msgifs/icon4.gif')
    cap_user = cv2.VideoCapture('user.mp4')
    i = 0
    while (True):
        ret, orig_frame = cap.read()
        ret2, orig_frame_user = cap_user.read()
        if i % 25 == 0:

            frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50)
            user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50)
            co1 = run_predict(frame, sess, inputs, outputs, cfg, dataset, sm,
                              draw_multi)
            user_co1 = run_predict(user_frame, sess, inputs, outputs, cfg,
                                   dataset, sm, draw_multi)
            try:
                slope_reqd = slope_calc(co1)
                slope_user = slope_calc(user_co1)
                compare_images(slope_reqd, slope_user, 0.1)
            except IndexError:
                #if len(co1)!=len(user_co1):
                #messagebox.showinfo("Title", "Please adjust camera to show your keypoints")
                pass
            #frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0)
            #user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0)
            cv2.putText(user_frame,
                        "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            cv2.imshow('user_frame', user_frame)
            cv2.imshow('frame', frame)
            fps_time = time.time()
            #visualize.waitforbuttonpress()
            if cv2.waitKey(10) == ord('q'):
                break
    cap.release()
    cap_user.release()
    cv2.destroyAllWindows()
    cap_user.release()
示例#8
0
def train():
    setup_logging()

    cfg = load_config()
    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()

    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    restorer = tf.train.Saver(variables_to_restore)
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg)

    for it in range(max_iter+1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],
                                          feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}"
                         .format(it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])
def test_net(visualise, cache_scoremaps, development):
    logging.basicConfig(level=logging.INFO)

    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(False)

    sm = SpatialModel(cfg)
    sm.load()

    draw_multi = PersonDraw()

    from_cache = "cached_scoremaps" in cfg
    if not from_cache:
        sess, inputs, outputs = setup_pose_prediction(cfg)

    if cache_scoremaps:
        out_dir = cfg.scoremap_dir
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    pairwise_stats = dataset.pairwise_stats
    num_images = dataset.num_images if not development else min(
        10, dataset.num_images)
    coco_results = []

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images - 1))

        batch = dataset.next_batch()

        cache_name = "{}.mat".format(batch[Batch.data_item].coco_id)

        if not from_cache:
            outputs_np = sess.run(outputs,
                                  feed_dict={inputs: batch[Batch.inputs]})
            scmap, locref, pairwise_diff = extract_cnn_output(
                outputs_np, cfg, pairwise_stats)

            if cache_scoremaps:
                if visualise:
                    img = np.squeeze(batch[Batch.inputs]).astype('uint8')
                    pose = argmax_pose_predict(scmap, locref, cfg.stride)
                    arrows = argmax_arrows_predict(scmap, locref,
                                                   pairwise_diff, cfg.stride)
                    visualize.show_arrows(cfg, img, pose, arrows)
                    visualize.waitforbuttonpress()
                    continue

                out_fn = os.path.join(out_dir, cache_name)
                dict = {
                    'scoremaps': scmap.astype('float32'),
                    'locreg_pred': locref.astype('float32'),
                    'pairwise_diff': pairwise_diff.astype('float32')
                }
                scipy.io.savemat(out_fn, mdict=dict)
                continue
        else:
            # cache_name = '1.mat'
            full_fn = os.path.join(cfg.cached_scoremaps, cache_name)
            mlab = scipy.io.loadmat(full_fn)
            scmap = mlab["scoremaps"]
            locref = mlab["locreg_pred"]
            pairwise_diff = mlab["pairwise_diff"]

        detections = extract_detections(cfg, scmap, locref, pairwise_diff)
        unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(
            sm, detections)
        person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array,
                                                     pos_array)

        if visualise:
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            # visualize.show_heatmaps(cfg, img, scmap, pose)
            """
            # visualize part detections after NMS
            visim_dets = visualize_detections(cfg, img, detections)
            plt.imshow(visim_dets)
            plt.show()
            visualize.waitforbuttonpress()
            """

            #            """
            visim_multi = img.copy()
            draw_multi.draw(visim_multi, dataset, person_conf_multi)

            plt.imshow(visim_multi)
            plt.show()
            visualize.waitforbuttonpress()
        #            """

        if cfg.use_gt_segm:
            coco_img_results = pose_predict_with_gt_segm(
                scmap, locref, cfg.stride, batch[Batch.data_item].gt_segm,
                batch[Batch.data_item].coco_id)
            coco_results += coco_img_results
            if len(coco_img_results):
                dataset.visualize_coco(coco_img_results,
                                       batch[Batch.data_item].visibilities)

    if cfg.use_gt_segm:
        with open('predictions_with_segm.json', 'w') as outfile:
            json.dump(coco_results, outfile)

    sess.close()
from config import load_config
from dataset.factory import create as create_dataset
from nnet import predict
from util import visualize
from dataset.pose_dataset import data_to_input

from multiperson.detections import extract_detections
from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut
from multiperson.visualize import PersonDraw, visualize_detections

import matplotlib.pyplot as plt


cfg = load_config("demo/pose_cfg_multi.yaml")

dataset = create_dataset(cfg)

sm = SpatialModel(cfg)
sm.load()

draw_multi = PersonDraw()

# Load and setup CNN part detector
sess, inputs, outputs = predict.setup_pose_prediction(cfg)

# Read image from file
file_name_ext = sys.argv[1] ## example: test_single_03.png
file_name = file_name_ext.split('.')[0] ## example: test_single_03
file_name_input = 'testset/' + file_name_ext
image = imread(file_name_input, mode='RGB')
def test_net(visualise, cache_scoremaps, development):
    logging.basicConfig(level=logging.INFO)

    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(False)

    sm = SpatialModel(cfg)
    sm.load()

    draw_multi = PersonDraw()

    from_cache = "cached_scoremaps" in cfg
    if not from_cache:
        sess, inputs, outputs = setup_pose_prediction(cfg)

    if cache_scoremaps:
        out_dir = cfg.scoremap_dir
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    pairwise_stats = dataset.pairwise_stats
    num_images = dataset.num_images if not development else min(10, dataset.num_images)
    coco_results = []

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images-1))

        batch = dataset.next_batch()

        cache_name = "{}.mat".format(batch[Batch.data_item].coco_id)

        if not from_cache:
            outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]})
            scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg, pairwise_stats)

            if cache_scoremaps:
                if visualise:
                    img = np.squeeze(batch[Batch.inputs]).astype('uint8')
                    pose = argmax_pose_predict(scmap, locref, cfg.stride)
                    arrows = argmax_arrows_predict(scmap, locref, pairwise_diff, cfg.stride)
                    visualize.show_arrows(cfg, img, pose, arrows)
                    visualize.waitforbuttonpress()
                    continue

                out_fn = os.path.join(out_dir, cache_name)
                dict = {'scoremaps': scmap.astype('float32'),
                        'locreg_pred': locref.astype('float32'),
                        'pairwise_diff': pairwise_diff.astype('float32')}
                scipy.io.savemat(out_fn, mdict=dict)
                continue
        else:
            #cache_name = '1.mat'
            full_fn = os.path.join(cfg.cached_scoremaps, cache_name)
            mlab = scipy.io.loadmat(full_fn)
            scmap = mlab["scoremaps"]
            locref = mlab["locreg_pred"]
            pairwise_diff = mlab["pairwise_diff"]

        detections = extract_detections(cfg, scmap, locref, pairwise_diff)
        unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
        person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)

        if visualise:
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            #visualize.show_heatmaps(cfg, img, scmap, pose)

            """
            # visualize part detections after NMS
            visim_dets = visualize_detections(cfg, img, detections)
            plt.imshow(visim_dets)
            plt.show()
            visualize.waitforbuttonpress()
            """

#            """
            visim_multi = img.copy()
            draw_multi.draw(visim_multi, dataset, person_conf_multi)

            plt.imshow(visim_multi)
            plt.show()
            visualize.waitforbuttonpress()
#            """


        if cfg.use_gt_segm:
            coco_img_results = pose_predict_with_gt_segm(scmap, locref, cfg.stride, batch[Batch.data_item].gt_segm,
                                                      batch[Batch.data_item].coco_id)
            coco_results += coco_img_results
            if len(coco_img_results):
                dataset.visualize_coco(coco_img_results, batch[Batch.data_item].visibilities)

    if cfg.use_gt_segm:
        with open('predictions_with_segm.json', 'w') as outfile:
            json.dump(coco_results, outfile)

    sess.close()
示例#12
0
def train():
    # 设置日志
    setup_logging()

    # 载入训练配置文件pose_cfg.yaml
    cfg = load_config()
    # 创建数据集类的实例
    dataset = create_dataset(cfg)

    # 获取batch_spec
    # 包含输入图片大小
    # 关节heatmap的大小
    # 关节weight的大小
    # 精细化heatmap的大小
    # 精细化mask的大小
    batch_spec = get_batch_spec(cfg)
    # 根据batch_spec产生入队操作、placeholder和batch数据
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    # 生成网络结构并且产生losses op
    # 其中losses包括很多类型的loss
    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    # 把多个loss合并起来
    for k, t in losses.items():
        # return a scalar Tensor of type string which contains a Summary protobuf.
        tf.summary.scalar(k, t)
    # returns a scalar Tensor of type string containing the serialized
    # Summary protocol buffer resulting from the merging
    merged_summaries = tf.summary.merge_all()

    # 获取/resnet_v1下面的所有的变量
    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    # Create the saver which will be used to restore the variables.

    # 创建一个恢复resent_v1的权重的op
    restorer = tf.train.Saver(variables_to_restore)
    # 创建一个保存训练状态的op
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    # 开启一个线程去读取数据并且装入到队列
    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    # 打开一个训练的记录器
    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    # 获取train_op和学习率op
    learning_rate, train_op = get_optimizer(total_loss, cfg)

    # 初始化全局和局部变量
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.从文件中读取权重到内存
    restorer.restore(sess, cfg.init_weights)

    # 从配置文件获取最大迭代次数
    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0

    # 生成一个学习率产生器的实例
    lr_gen = LearningRate(cfg)

    for it in range(max_iter + 1):
        # 根据当前迭代的次数产生一个学习率
        current_lr = lr_gen.get_lr(it)
        # 进行训练
        [_, loss_val,
         summary] = sess.run([train_op, total_loss, merged_summaries],
                             feed_dict={learning_rate: current_lr})
        # 累加loss
        cum_loss += loss_val
        # 将迭代次数保存起来
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:  # 每隔display_iters就显示一次 loss
            average_loss = cum_loss / display_iters  # 平均loss
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}".format(
                it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        # 每隔cfg.save_iters次就会保存
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            # 获得模型的名称
            model_name = cfg.snapshot_prefix
            # 保存模型
            saver.save(sess, model_name, global_step=it)

    sess.close()
    # 请求数据读取线程停止
    coord.request_stop()
    # 等待数据读取线程结束
    coord.join([thread])
def video2posevideo(video_name):
    time_start = time.clock()

    import numpy as np

    sys.path.append(os.path.dirname(__file__) + "/../")

    from scipy.misc import imread, imsave

    from config import load_config
    from dataset.factory import create as create_dataset
    from nnet import predict
    from util import visualize
    from dataset.pose_dataset import data_to_input

    from multiperson.detections import extract_detections
    from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut
    from multiperson.visualize import PersonDraw, visualize_detections

    import matplotlib.pyplot as plt

    from PIL import Image, ImageDraw, ImageFont
    font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 24)

    import random

    cfg = load_config("demo/pose_cfg_multi.yaml")

    dataset = create_dataset(cfg)

    sm = SpatialModel(cfg)
    sm.load()

    draw_multi = PersonDraw()

    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    ################

    video = read_video(video_name)

    video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second
    video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3

    pose_frame_list = []

    point_r = 3 # radius of points
    point_min = 10 # threshold of points - If there are more than point_min points in person, we define he/she is REAL PERSON
    part_min = 3 # threshold of parts - If there are more than part_min parts in person, we define he/she is REAL PERSON / part means head, arm and leg
    point_num = 17 # There are 17 points in 1 person

    def ellipse_set(person_conf_multi, people_i, point_i):
        return (person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r)

    def line_set(person_conf_multi, people_i, point_i, point_j):
        return (person_conf_multi[people_i][point_i][0], person_conf_multi[people_i][point_i][1], person_conf_multi[people_i][point_j][0], person_conf_multi[people_i][point_j][1])

    def draw_ellipse_and_line(draw, person_conf_multi, people_i, a, b, c, point_color):
        draw.ellipse(ellipse_set(person_conf_multi, people_i, a), fill=point_color)
        draw.ellipse(ellipse_set(person_conf_multi, people_i, b), fill=point_color)
        draw.ellipse(ellipse_set(person_conf_multi, people_i, c), fill=point_color)
        draw.line(line_set(person_conf_multi, people_i, a, b), fill=point_color, width=5)
        draw.line(line_set(person_conf_multi, people_i, b, c), fill=point_color, width=5)

    for i in range(0, video_frame_number):
        image = video.get_frame(i/video.fps)

        ######################

        image_batch = data_to_input(image)

        # Compute prediction with the CNN
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)

        detections = extract_detections(cfg, scmap, locref, pairwise_diff)
        unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
        person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)

        # print('person_conf_multi: ')
        # print(type(person_conf_multi))
        # print(person_conf_multi)

        # Add library to save image
        image_img = Image.fromarray(image)

        # Save image with points of pose
        draw = ImageDraw.Draw(image_img)

        people_num = 0
        people_real_num = 0
        people_part_num = 0

        people_num = person_conf_multi.size / (point_num * 2)
        people_num = int(people_num)
        print('people_num: ' + str(people_num))

        for people_i in range(0, people_num):
            point_color_r = random.randrange(0, 256)
            point_color_g = random.randrange(0, 256)
            point_color_b = random.randrange(0, 256)
            point_color = (point_color_r, point_color_g, point_color_b, 255)
            point_list = []
            point_count = 0
            point_i = 0 # index of points
            part_count = 0 # count of parts in THAT person

            # To find rectangle which include that people - list of points x, y coordinates
            people_x = []
            people_y = []

            for point_i in range(0, point_num):
                if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
                    point_count = point_count + 1
                    point_list.append(point_i)

            # Draw each parts
            if (5 in point_list) and (7 in point_list) and (9 in point_list): # Draw left arm
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 5, 7, 9, point_color)
                part_count = part_count + 1
            if (6 in point_list) and (8 in point_list) and (10 in point_list): # Draw right arm
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 6, 8, 10, point_color)
                part_count = part_count + 1
            if (11 in point_list) and (13 in point_list) and (15 in point_list): # Draw left leg
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 11, 13, 15, point_color)
                part_count = part_count + 1
            if (12 in point_list) and (14 in point_list) and (16 in point_list): # Draw right leg
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 12, 14, 16, point_color)
                part_count = part_count + 1
            if point_count >= point_min:
                people_real_num = people_real_num + 1
                for point_i in range(0, point_num):
                    if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
                        draw.ellipse(ellipse_set(person_conf_multi, people_i, point_i), fill=point_color)
                        people_x.append(person_conf_multi[people_i][point_i][0])
                        people_y.append(person_conf_multi[people_i][point_i][1])
                # Draw rectangle which include that people
                draw.rectangle([min(people_x), min(people_y), max(people_x), max(people_y)], fill=point_color, outline=5)


            if part_count >= part_min:
                people_part_num = people_part_num + 1

        draw.text((0, 0), 'People(by point): ' + str(people_real_num) + ' (threshold = ' + str(point_min) + ')', (0,0,0), font=font)
        draw.text((0, 32), 'People(by line): ' + str(people_part_num) + ' (threshold = ' + str(part_min) + ')', (0,0,0), font=font)
        draw.text((0, 64), 'Frame: ' + str(i) + '/' + str(video_frame_number), (0,0,0), font=font)
        draw.text((0, 96), 'Total time required: ' + str(round(time.clock() - time_start, 1)) + 'sec', (0,0,0))

        print('people_real_num: ' + str(people_real_num))
        print('people_part_num: ' + str(people_part_num))
        print('frame: ' + str(i))

        image_img_numpy = np.asarray(image_img)

        pose_frame_list.append(image_img_numpy)

    video_pose = ImageSequenceClip(pose_frame_list, fps=video.fps)
    video_pose.write_videofile("testset/" + video_name + "_pose.mp4", fps=video.fps)

    print("Time(s): " + str(time.clock() - time_start))
示例#14
0
def train_gan(arguments):
    """ Setup result directory and enable logging to file in it """
    outdir = make_results_dir(arguments)
    logger.init(outdir, logging.INFO)
    logger.info('Arguments:\n{}'.format(pformat(arguments)))
    """ Initialize Tensorboard """
    tensorboard_writer = initialize_tensorboard(outdir)
    """ Set random seed throughout python, pytorch and numpy """
    logger.info('Using Random Seed value as: %d' % arguments['random_seed'])
    torch.manual_seed(
        arguments['random_seed'])  # Set for pytorch, used for cuda as well.
    random.seed(arguments['random_seed'])  # Set for python
    np.random.seed(arguments['random_seed'])  # Set for numpy
    """ Set device - cpu or gpu """
    device = torch.device(
        f"cuda:{opt.gpu}" if torch.cuda.is_available() else "cpu")
    logger.info(f'Using device - {device}')
    """ Load Model with weights(if available) """
    G: torch.nn.Module = get_model(
        arguments.get('generator_model_args')).to(device)
    D: torch.nn.Module = get_model(
        arguments.get('discriminator_model_args')).to(device)

    if arguments['mode'] == 'dcgan':
        G.apply(weights_init)
        D.apply(weights_init)
    """ Create optimizer """
    G_optimizer = create_optimizer(G.parameters(),
                                   arguments['generator_optimizer_args'])
    D_optimizer = create_optimizer(D.parameters(),
                                   arguments['discriminator_optimizer_args'])
    """ Create Loss """
    loss = torch.nn.BCELoss().to(device=device)  # GAN
    """ Load parameters for the Dataset """
    dataset: BaseDataset = create_dataset(arguments['dataset_args'],
                                          arguments['train_data_args'],
                                          arguments['val_data_args'])
    """ Generate all callbacks """
    callbacks: List[Callbacks] = generate_callbacks(arguments, dataset, device,
                                                    outdir)

    # """ Create loss function """
    # criterion = create_loss(arguments['loss_args'])
    """ Debug the inputs to model and save graph to tensorboard """
    dataset.debug()

    # Only One model is allowed
    # G_dummy_input = torch.rand(size=(1, arguments['generator_model_args']['model_constructor_args']['latent_dim']))
    # D_dummy_input = (torch.rand(1,
    #                           arguments['dataset_args']['name'].value['channels'],
    #                           32, 32  # *arguments['dataset_args']['name'].value['image_size']  # ToDo Fix this
    #                           ))
    # tensorboard_writer.save_graph('Generator', G, G_dummy_input.to(device))
    # tensorboard_writer.save_graph('Discriminator', D, D_dummy_input.to(device))
    logger.info(G)
    logger.info(D)

    def reset_grad():
        G.zero_grad()
        D.zero_grad()

    batch_size = arguments['train_data_args']['batch_size']
    z_dim = arguments['generator_model_args']['model_constructor_args']['nz']

    generator = infinite_train_gen(dataset.train_dataloader)
    interval_length = 10 if is_debug_mode() else 400
    num_intervals = 1 if is_debug_mode() else int(arguments['num_iterations'] /
                                                  interval_length)

    global_step = 0

    # TO allocate memory required for the GPU during training and validation
    run_callbacks(
        callbacks,
        model=(G, D),
        optimizer=(G_optimizer,
                   D_optimizer),  # To Save optimizer dict for retraining.
        mode=CallbackMode.ON_NTH_ITERATION,
        iteration=global_step)
    reset_grad()

    for it in range(num_intervals):

        logger.info(f'Interval {it + 1}/{num_intervals}')

        # Set model in train mode
        G.train()
        D.train()

        t = trange(interval_length)
        for _ in t:
            if arguments['mode'] == 'dcgan':
                D_loss, G_loss = train_gan_iter(D, D_optimizer, G, G_optimizer,
                                                loss, device, generator,
                                                batch_size, reset_grad, z_dim,
                                                tensorboard_writer,
                                                global_step)
            elif arguments['mode'] == 'wgan-wp':
                D_loss, G_loss = train_wgan_iter(D, D_optimizer, G,
                                                 G_optimizer, device,
                                                 generator, batch_size,
                                                 reset_grad, z_dim,
                                                 tensorboard_writer,
                                                 global_step)
            elif arguments['mode'] == 'wgan-noise-adversarial':
                D_loss, G_loss = train_noisy_wgan_iter(
                    D,
                    D_optimizer,
                    G,
                    G_optimizer,
                    device,
                    generator,
                    batch_size,
                    reset_grad,
                    z_dim,
                    tensorboard_writer,
                    global_step,
                    contamination_loss_weight=arguments[
                        'contamination_loss_weight'])

            # Log D_Loss and G_Loss in progress_bar
            t.set_postfix(D_Loss=D_loss.data.cpu().item(),
                          G_Loss=G_loss.data.cpu().item())

            # Save Loss In Tensorboard
            tensorboard_writer.save_scalars(
                f'{arguments["mode"].upper()}_Loss', {
                    'Discriminator' if arguments['mode'] == 'dcgan' else 'Critic':
                    D_loss.data.cpu().item(),
                    'Generator':
                    G_loss.data.cpu().item()
                }, global_step)
            global_step += 1

        print(
            f'Discriminator Loss: {D_loss.data.cpu().item()}, Generator Loss: {G_loss.data.cpu().item()}'
        )

        run_callbacks(
            callbacks,
            model=(G, D),
            optimizer=(G_optimizer,
                       D_optimizer),  # To Save optimizer dict for retraining.
            mode=CallbackMode.ON_NTH_ITERATION,
            iteration=global_step)
        reset_grad()
示例#15
0
def train():
    setup_logging()

    cfg = load_config()

    # load newest snapshot
    snapshots = [fn.split('.')[0] for fn in os.listdir(os.getcwd()) if "index" in fn]
    if len(snapshots) > 0:
        iters = np.array([int(fn.split('-')[1]) for fn in snapshots])
        cfg['init_weights'] = snapshots[iters.argmax()]
        start = iters.max()
    else:
        start = 0

    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()

    if start==0:
        variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
        restorer = tf.train.Saver(variables_to_restore)
    else:
        restorer = tf.train.Saver()
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg, start)

    startTime = time.time()

    for it in range(start, max_iter+1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],
                                          feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            elapsed = timedelta(seconds=(time.time()-startTime))
            logging.info("iteration: {} loss: {} lr: {} time: {}"
                         .format(it, "{0:.4f}".format(average_loss), current_lr, elapsed))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != start) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])
def video2poseframe(video_name):
    import numpy as np

    sys.path.append(os.path.dirname(__file__) + "/../")

    from scipy.misc import imread, imsave

    from config import load_config
    from dataset.factory import create as create_dataset
    from nnet import predict
    from util import visualize
    from dataset.pose_dataset import data_to_input

    from multiperson.detections import extract_detections
    from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut
    from multiperson.visualize import PersonDraw, visualize_detections

    import matplotlib.pyplot as plt

    from PIL import Image, ImageDraw

    import random

    cfg = load_config("demo/pose_cfg_multi.yaml")

    dataset = create_dataset(cfg)

    sm = SpatialModel(cfg)
    sm.load()

    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    ################

    video = read_video(video_name)

    video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second
    video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3

    if not os.path.exists('testset/' + video_name):
        os.makedirs('testset/' + video_name)

    for i in range(0, video_frame_number):
        image = video.get_frame(i/video.fps)

        ######################

        image_batch = data_to_input(image)

        # Compute prediction with the CNN
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)

        detections = extract_detections(cfg, scmap, locref, pairwise_diff)
        unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
        person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)

        print('person_conf_multi: ')
        print(type(person_conf_multi))
        print(person_conf_multi)

        # Add library to save image
        image_img = Image.fromarray(image)

        # Save image with points of pose
        draw = ImageDraw.Draw(image_img)

        people_num = 0
        point_num = 17
        print('person_conf_multi.size: ')
        print(person_conf_multi.size)
        people_num = person_conf_multi.size / (point_num * 2)
        people_num = int(people_num)
        print('people_num: ')
        print(people_num)

        point_i = 0 # index of points
        point_r = 5 # radius of points

        people_real_num = 0
        for people_i in range(0, people_num):
            point_color_r = random.randrange(0, 256)
            point_color_g = random.randrange(0, 256)
            point_color_b = random.randrange(0, 256)
            point_color = (point_color_r, point_color_g, point_color_b, 255)
            point_count = 0
            for point_i in range(0, point_num):
                if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
                    point_count = point_count + 1
            if point_count > 5: # If there are more than 5 point in person, we define he/she is REAL PERSON
                people_real_num = people_real_num + 1
                for point_i in range(0, point_num):
                    draw.ellipse((person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r), fill=point_color)

        print('people_real_num: ')
        print(people_real_num)

        video_name_result = 'testset/' + video_name + '/frame_pose_' + str(i).zfill(video_frame_ciphers) + '.jpg'
        image_img.save(video_name_result, "JPG")
示例#17
0
def test_net(visualise, cache_scoremaps):
    # 打开python的日志功能
    logging.basicConfig(level=logging.INFO)

    # 加载配置文件
    cfg = load_config()
    # 根据配置文件中的信息产生数据读取类的实例
    dataset = create_dataset(cfg)
    # 不用对数据进行洗牌
    dataset.set_shuffle(False)
    # 告诉数据读取类没有类标,即处于测试模式
    dataset.set_test_mode(True)

    # 该函数返回session,输入算子,输出算子
    sess, inputs, outputs = setup_pose_prediction(cfg)

    # 是否需要保存测试过程中的heatmap
    if cache_scoremaps:
        # 保存heatmap的目录
        out_dir = cfg.scoremap_dir
        # 目录不存在则创建
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    # 图片个数
    num_images = dataset.num_images
    # 预测的关节坐标都保存在这里
    predictions = np.zeros((num_images, ), dtype=np.object)

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images - 1))

        # 获得一批数据
        batch = dataset.next_batch()

        # 进行预测
        outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]})

        # 得到heatmap和精细化的heatmap
        scmap, locref = extract_cnn_output(outputs_np, cfg)

        # 获得最终的关节坐标
        '''
        pose = [ [ pos_f8[::-1], [scmap[maxloc][joint_idx]] ] .... ..... ....   ]
        用我的话说就是下面的结构
        pose = [ [关节的坐标,  关节坐标的置信度] .... ..... ....  ]
        '''
        pose = argmax_pose_predict(scmap, locref, cfg.stride)

        pose_refscale = np.copy(pose)
        # 除以尺度,就能恢复到未经过缩放的图像的坐标系上去
        # 注意0:2是左开右闭的区间只取到了0和1
        pose_refscale[:, 0:2] /= cfg.global_scale
        predictions[k] = pose_refscale

        if visualise:
            # 获取图片
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            # 显示heatmap
            visualize.show_heatmaps(cfg, img, scmap, pose)
            # 等待按键按下
            visualize.waitforbuttonpress()

        if cache_scoremaps:
            # 保存heatmap
            base = os.path.basename(batch[Batch.data_item].im_path)
            raw_name = os.path.splitext(base)[0]
            out_fn = os.path.join(out_dir, raw_name + '.mat')
            scipy.io.savemat(out_fn,
                             mdict={'scoremaps': scmap.astype('float32')})

            # 保存精细化关节定位的heatmap
            out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat')
            if cfg.location_refinement:
                scipy.io.savemat(
                    out_fn, mdict={'locreg_pred': locref.astype('float32')})

    # 将最终预测的关节坐标保存起来
    scipy.io.savemat('predictions.mat', mdict={'joints': predictions})

    sess.close()
示例#18
0
def main():
    start_time=time.time()
    print("main hai")
    tf.reset_default_graph()
    cfg = load_config("demo/pose_cfg_multi.yaml")
    dataset = create_dataset(cfg)
    sm = SpatialModel(cfg)
    sm.load()
    draw_multi = PersonDraw()
    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    # Read image from file
    dir=os.listdir("stick")
    k=0
    cap=cv2.VideoCapture(0)
    i=0
    while (cap.isOpened()):
            if i%20 == 0:                   
                ret, orig_frame= cap.read()
                if ret==True:
                    frame = cv2.resize(orig_frame, (0, 0), fx=0.30, fy=0.30)
                    image= frame
                    sse=0
                    mse=0
                    
                    image_batch = data_to_input(frame)

                    # Compute prediction with the CNN
                    outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})

                    scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)

                    detections = extract_detections(cfg, scmap, locref, pairwise_diff)

                    unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)

                    person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)
                    img = np.copy(image)
                    #coor = PersonDraw.draw()
                    visim_multi = img.copy()
                    co1=draw_multi.draw(visim_multi, dataset, person_conf_multi)
                    plt.imshow(visim_multi)
                    plt.show()
                    visualize.waitforbuttonpress()
                    #print("this is draw : ", co1)
                    if k==1:
                        qwr = np.zeros((1920,1080,3), np.uint8)

                        cv2.line(qwr, co1[5][0], co1[5][1],(255,0,0),3)
                        cv2.line(qwr, co1[7][0], co1[7][1],(255,0,0),3)
                        cv2.line(qwr, co1[6][0], co1[6][1],(255,0,0),3)
                        cv2.line(qwr, co1[4][0], co1[4][1],(255,0,0),3)

                        cv2.line(qwr, co1[9][0], co1[9][1],(255,0,0),3)
                        cv2.line(qwr, co1[11][0], co1[11][1],(255,0,0),3)
                        cv2.line(qwr, co1[8][0], co1[8][1],(255,0,0),3)
                        cv2.line(qwr, co1[10][0], co1[10][1],(255,0,0),3)
                        # In[9]:
                        cv2.imshow('r',qwr)
                        qwr2="stick/frame"+str(k)+".jpg"
                        qw1 = cv2.cvtColor(qwr, cv2.COLOR_BGR2GRAY)
                        qw2= cv2.cvtColor(qwr2, cv2.COLOR_BGR2GRAY)

                        fig = plt.figure("Images")
                        images = ("Original", qw1), ("Contrast", qw2)
                        for (i, (name, image)) in enumerate(images):
                                ax = fig.add_subplot(1, 3, i + 1)
                                ax.set_title(name)
                        plt.imshow(hash(tuple(image)))
                        # compare the images
                        s,m=compare_images(qw1, qw2, "Image1 vs Image2")
                        k+=1
                        sse=s
                        mse=m

                else:
                    break
    elapsed= time.time()-start_time
    #print("sse score : ", sse)
    print("Mean squared error : ", elapsed/100)
    cap.release()
    cv2.destroyAllWindows()
示例#19
0
    )

    train_data_args = dict(
        batch_size=64,
        shuffle=True,
        to_train=True,
    )

    val_data_args = dict(
        batch_size=train_data_args['batch_size'] * 4,
        shuffle=False,
        validate_step_size=1,
    )

    dataset: BaseDataset = create_dataset(dataset_args,
                                          train_data_args,
                                          val_data_args)

    device = torch.device(f"cuda:0" if torch.cuda.is_available() else "cpu")

    eval_model.load_state_dict(torch.load(
        './logs/2019-12-22T02:24:08.329024_mode_classification_model_ConvNetSimple_dataset_MNIST_subset_1.0_bs_64_name_Adam_lr_0.001/epoch_0032-model-val_accuracy_99.11754911754912.pth'))
    eval_model = torch.nn.Sequential(*list(eval_model.children())[:4])

    start = time.time()
    outdir = './logs/fretchet_score'
    transform = None

    callback = FrechetInceptionScoreCallback(outdir='./logs/frechet_score',
                                             device=device,
                                             classifier=eval_model,
示例#20
0
from PIL import Image, ImageDraw, ImageFont
font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 12)

import random

# for object-tracker
import dlib

# import video_pose

####################

cfg = load_config("demo/pose_cfg_multi.yaml")

dataset = create_dataset(cfg)

sm = SpatialModel(cfg)
sm.load()

draw_multi = PersonDraw()

# Load and setup CNN part detector
sess, inputs, outputs = predict.setup_pose_prediction(cfg)

##########
## Get the source of video

parser = ap.ArgumentParser()
parser.add_argument('-f', "--videoFile", help="Path to Video File")
parser.add_argument('-w', "--videoWidth", help="Width of Output Video")
示例#21
0
def objective(arguments):
    """
    Main Pipeline for training and cross-validation. ToDo - Testing will be done separately in test.py.
    """
    """ Setup result directory and enable logging to file in it """
    outdir = make_results_dir(arguments)
    logger.init(outdir, logging.INFO)
    logger.info('Arguments:\n{}'.format(pformat(arguments)))
    """ Initialize Tensorboard """
    tensorboard_writer = initialize_tensorboard(outdir)
    """ Set random seed throughout python, pytorch and numpy """
    logger.info('Using Random Seed value as: %d' % arguments['random_seed'])
    torch.manual_seed(
        arguments['random_seed'])  # Set for pytorch, used for cuda as well.
    random.seed(arguments['random_seed'])  # Set for python
    np.random.seed(arguments['random_seed'])  # Set for numpy
    """ Set device - cpu or gpu """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    logger.info(f'Using device - {device}')
    """ Load Model with weights(if available) """
    model: torch.nn.Module = get_model(arguments.get('model_args')).to(device)
    """ Create loss function """
    criterion = create_loss(arguments['loss_args'])
    """ Create optimizer """
    optimizer = create_optimizer(model.parameters(),
                                 arguments['optimizer_args'])
    """ Load parameters for the Dataset """
    dataset: BaseDataset = create_dataset(arguments['dataset_args'],
                                          arguments['train_data_args'],
                                          arguments['val_data_args'])
    """ Generate all callbacks """
    callbacks: List[Callbacks] = generate_callbacks(arguments, dataset, device,
                                                    outdir)
    """ Debug the inputs to model and save graph to tensorboard """
    dataset.debug()
    dummy_input = (torch.rand(
        1,
        arguments['dataset_args']['name'].value['channels'],
        *arguments['dataset_args']['name'].value['image_size'],
    )).to(device)
    tensorboard_writer.save_graph(model, dummy_input)
    """ Pipeline - loop over the dataset multiple times """
    max_validation_accuracy = 0
    itr = 0

    best_model_path = None
    delete_old_models = True

    run_callbacks(callbacks,
                  model=model,
                  optimizer=optimizer,
                  mode=CallbackMode.ON_TRAIN_BEGIN)
    for epoch in range(arguments['nb_epochs']):
        """ Train the model """
        train_data_args = arguments['train_data_args']
        if train_data_args['to_train']:
            train_dataloader = dataset.train_dataloader
            progress_bar = ProgressBar(
                target=len(train_dataloader),
                clear=True,
                description=f"Training {epoch + 1}/{arguments['nb_epochs']}: ")
            loss_running_average = RunningAverage()

            run_callbacks(callbacks,
                          model=model,
                          optimizer=optimizer,
                          mode=CallbackMode.ON_EPOCH_BEGIN,
                          epoch=epoch)
            model.train()
            for i, data in enumerate(train_dataloader, 0):
                # get the inputs
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # Forward Pass
                outputs = model(inputs)

                classification_loss = criterion(outputs, labels)
                tensorboard_writer.save_scalar('Classification_Loss',
                                               classification_loss.item(), itr)
                classification_loss.backward()
                optimizer.step()

                # Compute running loss. Not exact but efficient.
                running_loss = loss_running_average.add_new_sample(
                    classification_loss.item())
                progress_bar.update(i + 1, [
                    ('current loss', classification_loss.item()),
                    ('running loss', running_loss),
                ])
                tensorboard_writer.save_scalar('Training_Loss',
                                               classification_loss, itr)
                itr += 1

            # Callbacks ON_EPOCH_END should be run only when training is enabled. Thus call here.
            run_callbacks(callbacks,
                          model=model,
                          optimizer=optimizer,
                          mode=CallbackMode.ON_EPOCH_END,
                          epoch=epoch)
        """ Validate the model """
        val_data_args = arguments['val_data_args']
        if val_data_args['validate_step_size'] > 0 and \
                epoch % val_data_args['validate_step_size'] == 0:
            correct, total = 0, 0
            validation_dataloader = dataset.validation_dataloader
            progress_bar = ProgressBar(
                target=len(validation_dataloader),
                clear=True,
                description=f"Validating {epoch + 1}/{arguments['nb_epochs']}: "
            )
            model.eval()
            with torch.no_grad():
                for i, data in enumerate(validation_dataloader, 0):
                    inputs, labels = data
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    outputs = model(inputs)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                    progress_bar.update(i + 1, [
                        ('Batch Accuracy', 100 * correct / total),
                    ])

            val_accuracy = 100 * correct / total
            tensorboard_writer.save_scalar('Validation_Accuracy', val_accuracy,
                                           itr)
            logger.info(
                f'Accuracy of the network on the {dataset.get_val_dataset_size} validation images: {val_accuracy} %%'
            )
            """ Save Model """
            if val_accuracy > max_validation_accuracy:
                if delete_old_models and best_model_path:
                    delete_old_file(best_model_path)
                best_model_path = os.path.join(
                    outdir,
                    f'epoch_{epoch:04}-model-val_accuracy_{val_accuracy}.pth')
                torch.save(model.state_dict(), best_model_path)
                max_validation_accuracy = val_accuracy

        tensorboard_writer.flush()

        # Exit loop if training not needed
        if not train_data_args['to_train']:
            break

    run_callbacks(callbacks,
                  model=model,
                  optimizer=optimizer,
                  mode=CallbackMode.ON_TRAIN_END)

    logger.info('Finished Training')
    close_tensorboard()
    logger.info(f'Max Validation accuracy is {max_validation_accuracy}')
    return max_validation_accuracy  # Return in case later u wanna add hyperopt.