示例#1
0
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None):
    x, img = gcv.data.transforms.presets.yolo.transform_test(img,
                                                             short=512,
                                                             max_size=350)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    plt.cla()
    pose_input, upscale_bbox = detector_to_simple_pose(img,
                                                       class_IDs,
                                                       scores,
                                                       bounding_boxs,
                                                       ctx=ctx)
    if len(upscale_bbox) > 0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)

        axes = plot_keypoints(img,
                              pred_coords,
                              confidence,
                              class_IDs,
                              bounding_boxs,
                              scores,
                              box_thresh=0.5,
                              keypoint_thresh=0.2,
                              ax=axes)
        plt.draw()
        plt.pause(0.001)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(0.001)

    return axes
def keypoint_detection(i,
                       frame,
                       imagepath,
                       detector,
                       pose_net,
                       ctx=mx.cpu(),
                       axes=None):

    global pause_time

    x, img = gcv.data.transforms.presets.yolo.transform_test(frame,
                                                             short=512,
                                                             max_size=1024)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    plt.cla()
    pose_input, upscale_bbox = detector_to_simple_pose(img,
                                                       class_IDs,
                                                       scores,
                                                       bounding_boxs,
                                                       output_shape=(1024,
                                                                     768),
                                                       ctx=ctx)

    #print(pose_input,"\n")
    if len(upscale_bbox) > 0:

        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)

        hackathon_action(i, frame, imagepath, pred_coords, confidence,
                         class_IDs, bounding_boxs, scores)

        axes = plot_keypoints(img,
                              pred_coords,
                              confidence,
                              class_IDs,
                              bounding_boxs,
                              scores,
                              box_thresh=0.5,
                              keypoint_thresh=0.2,
                              ax=axes)
        plt.draw()
        plt.pause(pause_time)
        #plt.pause(1.0)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(pause_time)

    return axes
示例#3
0
def work():
    train_trans = transforms.Compose(
        [transforms.RandomResizedCrop(224),
         transforms.ToTensor()])
    # https://gluon-cv.mxnet.io/build/examples_datasets/imagenet.html
    # You need to specify ``root`` for ImageNet if you extracted the images into
    # a different folder, not use rec but general images, such as  *.jpg
    train_data = DataLoader(ImageNet(
        train=True, root="/home1/ImageNet_ILSVRC2012/ILSVRC2012_img_train/").
                            transform_first(train_trans),
                            batch_size=128,
                            shuffle=True)

    for x, y in train_data:
        print(x.shape, y.shape)
        break
    from gluoncv.utils import viz

    val_dataset = ImageNet(train=False)
    viz.plot_image(val_dataset[1234][0])  # index 0 is image, 1 is label
    viz.plot_image(val_dataset[4567][0])
示例#4
0
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None):
    x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    plt.cla()
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs,
                                                       output_shape=(128, 96), ctx=ctx)
    if len(upscale_bbox) > 0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

        axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                              box_thresh=0.5, keypoint_thresh=0.2, ax=axes)
        plt.draw()
        plt.pause(0.001)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(0.001)

    return axes
示例#5
0
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(
    'dataset/MNIST', train=False, transform=transformer),
                                  batch_size=batch_size,
                                  shuffle=True,
                                  last_batch='discard')

# 데이터 확인하기
for data, label in train_data:
    break

print(data.shape, label.shape)

### graph
from gluoncv.utils import viz

viz.plot_image(data[0][0])  # index 0 is image, 1 is label

### 최적화
net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)

### trainer
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'momentum': 0.9,
    'learning_rate': .1
})

# 오차 함수
loss_function = gluon.loss.SoftmaxCrossEntropyLoss()


def evaluate_accuracy(data_iterator, net):
示例#6
0
# You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder.
train_dataset = ucf101.classification.UCF101(train=True,
                                             transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
for x, y in train_data:
    print('Video frame size (batch, height, width, RGB):', x.shape)
    print('Video label:', y.shape)
    break

#########################################################################
# Plot several training samples. index 0 is image, 1 is label
from gluoncv.utils import viz

viz.plot_image(train_dataset[7][0])  # Basketball
viz.plot_image(train_dataset[22][0])  # CricketBowling

#########################################################################
"""Here is another example that randomly reads 25 videos each time, randomly selects one clip per video and
performs center cropping. A clip can contain N consecutive frames, e.g., N=5.
"""
train_dataset = ucf101.classification.UCF101(train=True,
                                             new_length=5,
                                             transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
for x, y in train_data:
    print('Video frame size (batch, height, width, RGB):', x.shape)
    print('Video label:', y.shape)
示例#7
0
test_data = gluon.data.DataLoader(
    gluon.data.vision.datasets.ImageFolderDataset(test_path,
                                                  transform=transformer),
    batch_size=batch_size,
    shuffle=False,
    last_batch='discard')

for d, l in train_data:
    break

print(d.shape, l.shape)

### graph
from gluoncv.utils import viz
viz.plot_image(d[63][2])  # index 0 is image, 1 is label

from mxnet.gluon.model_zoo import vision
net = vision.alexnet(classes=10, pretrained=False, ctx=ctx)

net = gluon.nn.Sequential()
# 은닉층1 (채널=96, 커널=11, 패딩=1, 스트라이드=4, 활성화함수=relu)
# maxpooling(사이즈=3, 스트라이드2)
# 입력사이즈 (224, 224), 출력사이즈 (27, 27)
net.add(
    gluon.nn.Conv2D(96,
                    kernel_size=11,
                    padding=1,
                    strides=4,
                    activation='relu'))
net.add(gluon.nn.MaxPool2D(pool_size=3, strides=2))
示例#8
0
"""


from gluoncv.data import ImageNet
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision import transforms

train_trans = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor()
])

# You need to specify ``root`` for ImageNet if you extracted the images into
# a different folder
train_data = DataLoader(
    ImageNet(train=True).transform_first(train_trans),
    batch_size=128, shuffle=True)

#########################################################################
for x, y in train_data:
    print(x.shape, y.shape)
    break


#########################################################################
# Plot some validation images
from gluoncv.utils import viz
val_dataset = ImageNet(train=False)
viz.plot_image(val_dataset[1234][0])  # index 0 is image, 1 is label
viz.plot_image(val_dataset[4567][0])
示例#9
0
# You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder.
train_dataset = UCF101(train=True, transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
# We can see the shape of our loaded data as below. ``extra`` indicates if we select multiple crops or multiple segments
# from a video. Here, we only pick one frame per video, so the ``extra`` dimension is 1.
for x, y in train_data:
    print('Video frame size (batch, extra, channel, height, width):', x.shape)
    print('Video label:', y.shape)
    break

#########################################################################
# Let's plot several training samples. index 0 is image, 1 is label
from gluoncv.utils import viz
viz.plot_image(train_dataset[7][0].squeeze().transpose(
    (1, 2, 0)) * 255.0)  # Basketball
viz.plot_image(train_dataset[22][0].squeeze().transpose(
    (1, 2, 0)) * 255.0)  # CricketBowling

#########################################################################
# Here is the second example that randomly reads 25 videos each time, randomly selects one clip per video and
# performs center cropping. A clip can contain N consecutive frames, e.g., N=5.

train_dataset = UCF101(train=True, new_length=5, transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
# We can see the shape of our loaded data as below. Now we have another ``depth`` dimension which
# indicates how many frames in each clip (a.k.a, the temporal dimension).
for x, y in train_data:
    print('Video frame size (batch, extra, channel, depth, height, width):',
示例#10
0
# You need to specify ``setting`` and ``root`` for HMDB51 if you decoded the video frames into a different folder.
train_dataset = HMDB51(train=True, transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
# We can see the shape of our loaded data as below. ``extra`` indicates if we select multiple crops or multiple segments
# from a video. Here, we only pick one frame per video, so the ``extra`` dimension is 1.
for x, y in train_data:
    print('Video frame size (batch, extra, channel, height, width):', x.shape)
    print('Video label:', y.shape)
    break

#########################################################################
# Let's plot several training samples. index 0 is image, 1 is label
from gluoncv.utils import viz
viz.plot_image(train_dataset[500][0].squeeze().transpose(
    (1, 2, 0)) * 255.0)  # dive
viz.plot_image(train_dataset[2500][0].squeeze().transpose(
    (1, 2, 0)) * 255.0)  # shoot_bow

#########################################################################
# Here is the second example that randomly reads 25 videos each time, randomly selects one clip per video and
# performs center cropping. A clip can contain N consecutive frames, e.g., N=5.

train_dataset = HMDB51(train=True, new_length=5, transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
# We can see the shape of our loaded data as below. Now we have another ``depth`` dimension which
# indicates how many frames in each clip (a.k.a, the temporal dimension).
for x, y in train_data:
    print('Video frame size (batch, extra, channel, depth, height, width):',
示例#11
0
# You need to specify ``root`` for ImageNet if you extracted the images into
# a different folder
train_data = ImageRecordIter(
    path_imgrec = os.path.join(rec_path, 'train.rec'),
    path_imgidx = os.path.join(rec_path, 'train.idx'),
    data_shape  = (3, 224, 224),
    batch_size  = 32,
    shuffle     = True
)

#########################################################################
for batch in train_data:
    print(batch.data[0].shape, batch.label[0].shape)
    break

#########################################################################
# Plot some validation images
from gluoncv.utils import viz
val_data = ImageRecordIter(
    path_imgrec = os.path.join(rec_path, 'val.rec'),
    path_imgidx = os.path.join(rec_path, 'val.idx'),
    data_shape  = (3, 224, 224),
    batch_size  = 32,
    shuffle     = False
)
for batch in val_data:
    viz.plot_image(nd.transpose(batch.data[0][12], (1, 2, 0)))
    viz.plot_image(nd.transpose(batch.data[0][21], (1, 2, 0)))
    break
    batch_size=batch_size,
    shuffle=False,
    last_batch='discard')

for d, l in train_data:
    break

print(d.shape, l.shape)

for da, la in test_data:
    break
print(da.shape, la.shape)
########################################################################################################################
### graph
from gluoncv.utils import viz
viz.plot_image(d[2][1])  # index 0 is image, 1 is label
viz.plot_image(d[1][0])

########################################################################################################################
### model
net = nn.HybridSequential()
with net.name_scope():
    net.add(
        #
        nn.Conv2D(channels=96, kernel_size=11, strides=4, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        #
        nn.Conv2D(channels=256, kernel_size=5, padding=2, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        #
        nn.Conv2D(channels=384, kernel_size=3, padding=1, activation='relu'),
示例#13
0
"""


from gluoncv.data import ImageNet
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision import transforms

train_trans = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor()
])

# You need to specify ``root`` for ImageNet if you extracted the images into
# a different folder
train_data = DataLoader(
    ImageNet(train=True).transform_first(train_trans),
    batch_size=128, shuffle=True)

#########################################################################
for x, y in train_data:
    print(x.shape, y.shape)
    break


#########################################################################
# Plot some validation images
from gluoncv.utils import viz
val_dataset = ImageNet(train=False)
viz.plot_image(val_dataset[1234][0])  # index 0 is image, 1 is label
viz.plot_image(val_dataset[4567][0])
示例#14
0
# You need to specify ``root`` for ImageNet if you extracted the images into
# a different folder
train_data = ImageRecordIter(
    path_imgrec = os.path.join(rec_path, 'train.rec'),
    path_imgidx = os.path.join(rec_path, 'train.idx'),
    data_shape  = (3, 224, 224),
    batch_size  = 32,
    shuffle     = True
)

#########################################################################
for batch in train_data:
    print(batch.data[0].shape, batch.label[0].shape)
    break

#########################################################################
# Plot some validation images
from gluoncv.utils import viz
val_data = ImageRecordIter(
    path_imgrec = os.path.join(rec_path, 'val.rec'),
    path_imgidx = os.path.join(rec_path, 'val.idx'),
    data_shape  = (3, 224, 224),
    batch_size  = 32,
    shuffle     = False
)
for batch in val_data:
    viz.plot_image(nd.transpose(batch.data[0][12], (1, 2, 0)))
    viz.plot_image(nd.transpose(batch.data[0][21], (1, 2, 0)))
    break