def validation(model, criterion, valid_loader):
    model.eval()
    losses = []
    f2_scores = []
    for inputs, targets in valid_loader:
        inputs = utils.variable(inputs, volatile=True)
        targets = utils.variable(targets)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        losses.append(loss.data[0])
        f2_scores.append(f2_score(y_true=targets.data.cpu().numpy(), y_pred=F.sigmoid(outputs).data.cpu().numpy() > 0.2))
    valid_loss = np.mean(losses)  # type: float
    valid_f2 = np.mean(f2_scores)  # type: float
    print('Valid loss: {:.4f}, F2: {:.4f}'.format(valid_loss, valid_f2))
    return {'valid_loss': valid_loss, 'valid_f2': valid_f2}
示例#2
0
def validation(model, criterion, valid_loader):
    model.eval()
    losses = []
    f2_scores = []
    for inputs, targets in valid_loader:
        inputs = utils.variable(inputs, volatile=True)
        targets = utils.variable(targets)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        losses.append(loss.data[0])
        f2_scores.append(
            f2_score(y_true=targets.data.cpu().numpy(),
                     y_pred=F.sigmoid(outputs).data.cpu().numpy() > 0.2))
    valid_loss = np.mean(losses)  # type: float
    valid_f2 = np.mean(f2_scores)  # type: float
    print('Valid loss: {:.4f}, F2: {:.4f}'.format(valid_loss, valid_f2))
    return {'valid_loss': valid_loss, 'valid_f2': valid_f2}
def link_key_point(img_canvas, candidate, subset, stickwidth=4):
    for i in range(17):
        for n in range(len(subset)):
            index = subset[n][np.array(limb_seq[i]) - 1]
            if -1 in index:
                continue
            cur_canvas = img_canvas.copy()
            Y = candidate[index.astype(int), 0]
            X = candidate[index.astype(int), 1]
            mX = np.mean(X)
            mY = np.mean(Y)
            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
            img_canvas = cv2.addWeighted(img_canvas, 0.4, cur_canvas, 0.6, 0)

    return img_canvas
示例#4
0
def link_key_point(img_canvas, candidate, subset, stickwidth=4):
    for i in range(17):
        for n in range(len(subset)):
            index = subset[n][np.array(limb_seq[i]) - 1]
            if -1 in index:
                continue
            cur_canvas = img_canvas.copy()
            Y = candidate[index.astype(int), 0]
            X = candidate[index.astype(int), 1]
            mX = np.mean(X)
            mY = np.mean(Y)
            length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            polygon = cv2.ellipse2Poly(
                (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle),
                0, 360, 1)
            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
            img_canvas = cv2.addWeighted(img_canvas, 0.4, cur_canvas, 0.6, 0)

    return img_canvas
示例#5
0
                                      cooldown=2,
                                      verbose=1,
                                      min_lr=1e-5 * lr,
                                      factor=adaptive_lr_factor)

        for epoch in range(1, num_epochs + 1):
            print("Begin epoch {}/{}".format(epoch, num_epochs))
            epoch_losses, epoch_f2 = train_epoch(
                train_loader,
                model,
                loss_fn,
                optimizer,
                dtype,
                sigmoid_threshold=sigmoid_threshold,
                print_every=20)
            scheduler.step(np.mean(epoch_losses), epoch)
            ## f2 score for validation dataset
            f2_acc = validate_epoch(model,
                                    val_loader,
                                    dtype,
                                    sigmoid_threshold=sigmoid_threshold)
            ## store results
            train_acc_history += epoch_f2
            val_acc_history.append(f2_acc)
            loss_history += epoch_losses
            ## overwrite the model .pkl file every epoch
            torch.save(model.state_dict(), save_model_path)
            save_accuracy_and_loss_mat(save_mat_path,
                                       train_acc_history,
                                       val_acc_history,
                                       loss_history,
示例#6
0
canvas = cv2.imread(test_image)  # B,G,R order
for i in range(18):
    for j in range(len(all_peaks[i])):
        cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)

stickwidth = 4

for i in range(17):
    for n in range(len(subset)):
        index = subset[n][np.array(limbSeq[i]) - 1]
        if -1 in index:
            continue
        cur_canvas = canvas.copy()
        Y = candidate[index.astype(int), 0]
        X = candidate[index.astype(int), 1]
        mX = np.mean(X)
        mY = np.mean(Y)
        length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5
        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
        polygon = cv2.ellipse2Poly(
            (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0,
            360, 1)
        cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
        canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

#Parallel(n_jobs=1)(delayed(handle_one)(i) for i in range(18))

toc = time.time()
print 'time is %.5f' % (toc - tic)
cv2.imwrite('result.png', canvas)
示例#7
0
    def post_processing(self, humans):
        for h in humans:
            '''
			0 1 nose
			2 3 neck
			4 5 r_shoulder
			6 7 r_elbow
			8 9 r_wrist
			10 11 l_shoulder
			12 13 l_elbow
			14 15 l_wrist
			16 17 r_pelvis
			18 19 r_knee
			20 21 r_anckle
			22 23 l_pervis
			24 25 l_knee
			26 27 l_ankle
			28 29 r_eye
			30 31 l_eye
			32 33 r_ear
			34 35 l_ear
			'''
            #rasing hand
            if h.joints[10] >= 0 and h.joints[12] >= 0 and h.joints[
                    10] > h.joints[12]:
                h.isLWaving = 1
                h.tags.append('lwaving')

            if h.joints[4] >= 0 and h.joints[6] >= 0 and h.joints[
                    4] > h.joints[6]:
                h.isRWaving = 1
                h.tags.append('rwaving')

            if h.isLWaving == 1 or h.isRWaving == 1:
                h.isWaving = 1
                h.tags.append('waving')
            '''
			#rasing hand
			if h.joints[10] >= 0 and h.joints[14] >= 0 and h.joints[10] > h.joints[14]+self.params['waving_thr']:
				h.isLWaving = 1
				h.tags.append('lwaving')

			if h.joints[4] >= 0 and h.joints[8] >= 0 and h.joints[4] > h.joints[8]+self.params['waving_thr']:
				h.isRWaving = 1
				h.tags.append('rwaving')
			
			if h.isLWaving == 1 or h.isRWaving == 1:
				h.isWaving = 1
				h.tags.append('waving')
			'''
            #sitting
            shoulder_h = -9999
            cropped_cloud = self.cvbridge.imgmsg_to_cv2(
                h.cropped_cloud, desired_encoding="passthrough")
            '''
			#adjust positions
			valid_joints = []
			for i in range(0,18):
				if h.joints[2*i] != -1 and h.joints[2*i+1] != -1 :
					temp = self.get_pos_wrt_robot(cropped_cloud , h.joints[2*i] , h.joints[2*i+1])
					if (temp != 0).all():
						valid_joints.append( temp )
			if len(valid_joints) > 0 and h.valid_pose == 1 :
				pos_wrt_robot = np.array(valid_joints)
				pos_wrt_robot = np.median(pos_wrt_robot,axis=0)
				print 'pose_wrt_robot : ' , pos_wrt_robot
				h.pose_wrt_robot.position.x = pos_wrt_robot[0]
				h.pose_wrt_robot.position.y = pos_wrt_robot[1]
				h.pose_wrt_robot.position.z = pos_wrt_robot[2]
				pose_wrt_map = self.get_loc(pos_wrt_robot)
				print pose_wrt_map
				h.pose_wrt_map.position.x = pose_wrt_map[0]
				h.pose_wrt_map.position.y = pose_wrt_map[1]
				h.pose_wrt_map.position.z = pose_wrt_map[2]
				pose_wrt_odom = self.get_loc(pos_wrt_robot,target='odom')
				h.pose_wrt_odom.position.x = pose_wrt_odom[0]
				h.pose_wrt_odom.position.y = pose_wrt_odom[1]
				h.pose_wrt_odom.position.z = pose_wrt_odom[2]				
			'''

            if h.joints[4] > 0 and h.joints[5] > 0:
                shoulder_h = max(
                    shoulder_h,
                    self.get_pos_wrt_robot(cropped_cloud, h.joints[4],
                                           h.joints[5])[2])
            if h.joints[10] > 0 and h.joints[11] > 0:
                shoulder_h = max(
                    shoulder_h,
                    self.get_pos_wrt_robot(cropped_cloud, h.joints[10],
                                           h.joints[11])[2])
            if shoulder_h < self.params['sitting_thr'] and shoulder_h > -9999:
                h.isSitting = 1
                h.tags.append('sitting')

            # lying and standing
            knee = np.mean([h.joints[18], h.joints[24]])
            if h.joints[2] >= 0 and knee >= 0 and abs(h.joints[2] -
                                                      knee) <= 20:
                h.isLying = 1
                h.tags.append('lying')

            # pointing
            point_length = 45
            if h.joints[9] >= 0 and h.joints[5] >= 0 and abs(
                    h.joints[5] - h.joints[9]) > point_length:
                if h.joints[5] - h.joints[9] > 0:
                    h.isRPointing = 1
                    h.tags.append('rpointing')
                else:
                    h.isLPointing = 1
                    h.tags.append('lpointing')

            if h.joints[11] >= 0 and h.joints[15] >= 0 and abs(
                    h.joints[15] - h.joints[11]) > point_length:
                if h.joints[15] - h.joints[11] > 0:
                    h.isLPointing = 1
                    h.tags.append('lpointing')
                else:
                    h.isRPointing = 1
                    h.tags.append('rpointing')

            print h.object_id, h.person_name, h.isWaving, h.isSitting, shoulder_h
        return humans
示例#8
0
            #### for batter first
            videos = []

            # Not found until there is a frame with a person detected
            while not found and p < 30:
                #len(df[player][i])==0:
                ret, frame = video_capture.read()
                out = handle_one(
                    frame[top_b:bottom_b,
                          left_b:right_b])  # changed batter first move
                for person in range(len(out)):
                    hips = np.asarray(out[person])[indices]
                    hips = hips[np.sum(hips, axis=1) != 0]
                    if len(hips) == 0:
                        continue
                    mean_hips = np.mean(hips, axis=0)
                    norm = abs(mean_hips[0] - center[0]) + abs(
                        mean_hips[1] - center[1])  #6 hip
                    if norm < old_norm:
                        found = True
                        loc = person
                        old_norm = norm
                p += 1
                if not found:
                    pitcher_array.append([[0, 0] for j in range(18)])
                    print("no person detected in frame", p)
            if not found:
                print("no person detected in first 30 frames")
                continue

            #left_b = 0 # change for batters first move
def handle_one(oriImg):

    # for visualize
    canvas = np.copy(oriImg)
    imageToTest = Variable(T.transpose(
        T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(), 0), 2, 3), 1,
        2),
                           volatile=True).cuda()
    print oriImg.shape
    scale = model_['boxsize'] / float(oriImg.shape[0])
    print scale
    h = int(oriImg.shape[0] * scale)
    w = int(oriImg.shape[1] * scale)
    pad_h = 0 if (h % model_['stride']
                  == 0) else model_['stride'] - (h % model_['stride'])
    pad_w = 0 if (w % model_['stride']
                  == 0) else model_['stride'] - (w % model_['stride'])
    new_h = h + pad_h
    new_w = w + pad_w

    imageToTest = cv2.resize(oriImg, (0, 0),
                             fx=scale,
                             fy=scale,
                             interpolation=cv2.INTER_CUBIC)
    imageToTest_padded, pad = util.padRightDownCorner(imageToTest,
                                                      model_['stride'],
                                                      model_['padValue'])
    imageToTest_padded = np.transpose(
        np.float32(imageToTest_padded[:, :, :, np.newaxis]),
        (3, 2, 0, 1)) / 256 - 0.5

    feed = Variable(T.from_numpy(imageToTest_padded)).cuda()

    output1, output2 = model(feed)

    heatmap = nn.UpsamplingBilinear2d(
        (oriImg.shape[0], oriImg.shape[1])).cuda()(output2)

    paf = nn.UpsamplingBilinear2d(
        (oriImg.shape[0], oriImg.shape[1])).cuda()(output1)

    print heatmap.size()
    print paf.size()
    print type(heatmap)
    heatmap_avg = T.transpose(T.transpose(heatmap[0], 0, 1), 1,
                              2).data.cpu().numpy()
    paf_avg = T.transpose(T.transpose(paf[0], 0, 1), 1, 2).data.cpu().numpy()

    all_peaks = []
    peak_counter = 0

    #maps =
    for part in range(18):
        map_ori = heatmap_avg[:, :, part]
        map = gaussian_filter(map_ori, sigma=3)

        map_left = np.zeros(map.shape)
        map_left[1:, :] = map[:-1, :]
        map_right = np.zeros(map.shape)
        map_right[:-1, :] = map[1:, :]
        map_up = np.zeros(map.shape)
        map_up[:, 1:] = map[:, :-1]
        map_down = np.zeros(map.shape)
        map_down[:, :-1] = map[:, 1:]

        peaks_binary = np.logical_and.reduce(
            (map >= map_left, map >= map_right, map >= map_up, map >= map_down,
             map > param_['thre1']))
        #    peaks_binary = T.eq(
        #    peaks = zip(T.nonzero(peaks_binary)[0],T.nonzero(peaks_binary)[0])

        peaks = zip(np.nonzero(peaks_binary)[1],
                    np.nonzero(peaks_binary)[0])  # note reverse

        peaks_with_score = [x + (map_ori[x[1], x[0]], ) for x in peaks]
        id = range(peak_counter, peak_counter + len(peaks))
        peaks_with_score_and_id = [
            peaks_with_score[i] + (id[i], ) for i in range(len(id))
        ]

        all_peaks.append(peaks_with_score_and_id)
        peak_counter += len(peaks)

    connection_all = []
    special_k = []
    mid_num = 10

    for k in range(len(mapIdx)):
        score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
        candA = all_peaks[limbSeq[k][0] - 1]
        candB = all_peaks[limbSeq[k][1] - 1]
        nA = len(candA)
        nB = len(candB)
        indexA, indexB = limbSeq[k]
        if (nA != 0 and nB != 0):
            connection_candidate = []
            for i in range(nA):
                for j in range(nB):
                    vec = np.subtract(candB[j][:2], candA[i][:2])
                    norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
                    vec = np.divide(vec, norm)

                    startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
                                   np.linspace(candA[i][1], candB[j][1], num=mid_num))

                    vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
                                      for I in range(len(startend))])
                    vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
                                      for I in range(len(startend))])

                    score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(
                        vec_y, vec[1])
                    score_with_dist_prior = sum(
                        score_midpts) / len(score_midpts) + min(
                            0.5 * oriImg.shape[0] / norm - 1, 0)
                    criterion1 = len(
                        np.nonzero(score_midpts > param_['thre2'])
                        [0]) > 0.8 * len(score_midpts)
                    criterion2 = score_with_dist_prior > 0
                    if criterion1 and criterion2:
                        connection_candidate.append([
                            i, j, score_with_dist_prior,
                            score_with_dist_prior + candA[i][2] + candB[j][2]
                        ])

            connection_candidate = sorted(connection_candidate,
                                          key=lambda x: x[2],
                                          reverse=True)
            connection = np.zeros((0, 5))
            for c in range(len(connection_candidate)):
                i, j, s = connection_candidate[c][0:3]
                if (i not in connection[:, 3] and j not in connection[:, 4]):
                    connection = np.vstack(
                        [connection, [candA[i][3], candB[j][3], s, i, j]])
                    if (len(connection) >= min(nA, nB)):
                        break

            connection_all.append(connection)
        else:
            special_k.append(k)
            connection_all.append([])

    # last number in each row is the total parts number of that person
    # the second last number in each row is the score of the overall configuration
    subset = -1 * np.ones((0, 20))
    candidate = np.array([item for sublist in all_peaks for item in sublist])

    for k in range(len(mapIdx)):
        if k not in special_k:
            partAs = connection_all[k][:, 0]
            partBs = connection_all[k][:, 1]
            indexA, indexB = np.array(limbSeq[k]) - 1

            for i in range(len(connection_all[k])):  #= 1:size(temp,1)
                found = 0
                subset_idx = [-1, -1]
                for j in range(len(subset)):  #1:size(subset,1):
                    if subset[j][indexA] == partAs[i] or subset[j][
                            indexB] == partBs[i]:
                        subset_idx[found] = j
                        found += 1

                if found == 1:
                    j = subset_idx[0]
                    if (subset[j][indexB] != partBs[i]):
                        subset[j][indexB] = partBs[i]
                        subset[j][-1] += 1
                        subset[j][-2] += candidate[partBs[i].astype(int),
                                                   2] + connection_all[k][i][2]
                elif found == 2:  # if found 2 and disjoint, merge them
                    j1, j2 = subset_idx
                    print "found = 2"
                    membership = ((subset[j1] >= 0).astype(int) +
                                  (subset[j2] >= 0).astype(int))[:-2]
                    if len(np.nonzero(membership == 2)[0]) == 0:  #merge
                        subset[j1][:-2] += (subset[j2][:-2] + 1)
                        subset[j1][-2:] += subset[j2][-2:]
                        subset[j1][-2] += connection_all[k][i][2]
                        subset = np.delete(subset, j2, 0)
                    else:  # as like found == 1
                        subset[j1][indexB] = partBs[i]
                        subset[j1][-1] += 1
                        subset[j1][-2] += candidate[
                            partBs[i].astype(int), 2] + connection_all[k][i][2]

                # if find no partA in the subset, create a new subset
                elif not found and k < 17:
                    row = -1 * np.ones(20)
                    row[indexA] = partAs[i]
                    row[indexB] = partBs[i]
                    row[-1] = 2
                    row[-2] = sum(
                        candidate[connection_all[k][i, :2].astype(int),
                                  2]) + connection_all[k][i][2]
                    subset = np.vstack([subset, row])

    # delete some rows of subset which has few parts occur
    deleteIdx = []
    for i in range(len(subset)):
        if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
            deleteIdx.append(i)
    subset = np.delete(subset, deleteIdx, axis=0)

    #    canvas = cv2.imread(test_image) # B,G,R order
    for i in range(18):
        for j in range(len(all_peaks[i])):
            cv2.circle(canvas,
                       all_peaks[i][j][0:2],
                       4,
                       colors[i],
                       thickness=-1)

    stickwidth = 4

    for i in range(17):
        for n in range(len(subset)):
            index = subset[n][np.array(limbSeq[i]) - 1]
            if -1 in index:
                continue
            cur_canvas = canvas.copy()
            Y = candidate[index.astype(int), 0]
            X = candidate[index.astype(int), 1]
            mX = np.mean(X)
            mY = np.mean(Y)
            length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            polygon = cv2.ellipse2Poly(
                (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle),
                0, 360, 1)
            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
            canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

    return canvas
示例#10
0
def main(args):
    # hyperparameters
    batch_size = args.batch_size
    num_workers = 1

    # Image Preprocessing
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    # load COCOs dataset
    IMAGES_PATH = 'data/train2014'
    CAPTION_FILE_PATH = 'data/annotations/captions_train2014.json'

    vocab = load_vocab()
    train_loader = get_coco_data_loader(path=IMAGES_PATH,
                                        json=CAPTION_FILE_PATH,
                                        vocab=vocab,
                                        transform=transform,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=num_workers)

    IMAGES_PATH = 'data/val2014'
    CAPTION_FILE_PATH = 'data/annotations/captions_val2014.json'
    val_loader = get_coco_data_loader(path=IMAGES_PATH,
                                      json=CAPTION_FILE_PATH,
                                      vocab=vocab,
                                      transform=transform,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers)

    losses_val = []
    losses_train = []

    # Build the models
    ngpu = 1
    initial_step = initial_epoch = 0
    embed_size = args.embed_size
    num_hiddens = args.num_hidden
    learning_rate = 1e-3
    num_epochs = 3
    log_step = args.log_step
    save_step = 500
    checkpoint_dir = args.checkpoint_dir

    encoder = CNN(embed_size)
    decoder = RNN(embed_size,
                  num_hiddens,
                  len(vocab),
                  1,
                  rec_unit=args.rec_unit)

    # Loss
    criterion = nn.CrossEntropyLoss()

    if args.checkpoint_file:
        encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models(
            args.checkpoint_file, args.sample)
        initial_step, initial_epoch, losses_train, losses_val = meta
        encoder.load_state_dict(encoder_state_dict)
        decoder.load_state_dict(decoder_state_dict)
    else:
        params = list(decoder.parameters()) + list(
            encoder.linear.parameters()) + list(encoder.batchnorm.parameters())
        optimizer = torch.optim.Adam(params, lr=learning_rate)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    if args.sample:
        return utils.sample(encoder, decoder, vocab, val_loader)

    # Train the Models
    total_step = len(train_loader)
    try:
        for epoch in range(initial_epoch, num_epochs):

            for step, (images, captions,
                       lengths) in enumerate(train_loader, start=initial_step):

                # Set mini-batch dataset
                images = utils.to_var(images, volatile=True)
                captions = utils.to_var(captions)
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]

                # Forward, Backward and Optimize
                decoder.zero_grad()
                encoder.zero_grad()

                if ngpu > 1:
                    # run on multiple GPU
                    features = nn.parallel.data_parallel(
                        encoder, images, range(ngpu))
                    outputs = nn.parallel.data_parallel(
                        decoder, features, range(ngpu))
                else:
                    # run on single GPU
                    features = encoder(images)
                    outputs = decoder(features, captions, lengths)

                train_loss = criterion(outputs, targets)
                losses_train.append(train_loss.data[0])
                train_loss.backward()
                optimizer.step()

                # Run validation set and predict
                if step % log_step == 0:
                    encoder.batchnorm.eval()
                    # run validation set
                    batch_loss_val = []
                    for val_step, (images, captions,
                                   lengths) in enumerate(val_loader):
                        images = utils.to_var(images, volatile=True)
                        captions = utils.to_var(captions, volatile=True)

                        targets = pack_padded_sequence(captions,
                                                       lengths,
                                                       batch_first=True)[0]
                        features = encoder(images)
                        outputs = decoder(features, captions, lengths)
                        val_loss = criterion(outputs, targets)
                        batch_loss_val.append(val_loss.data[0])

                    losses_val.append(np.mean(batch_loss_val))

                    # predict
                    sampled_ids = decoder.sample(features)
                    sampled_ids = sampled_ids.cpu().data.numpy()[0]
                    sentence = utils.convert_back_to_text(sampled_ids, vocab)
                    print('Sample:', sentence)

                    true_ids = captions.cpu().data.numpy()[0]
                    sentence = utils.convert_back_to_text(true_ids, vocab)
                    print('Target:', sentence)

                    print(
                        'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}'
                        .format(epoch, step, losses_train[-1], losses_val[-1]))
                    encoder.batchnorm.train()

                # Save the models
                if (step + 1) % save_step == 0:
                    utils.save_models(encoder, decoder, optimizer, step, epoch,
                                      losses_train, losses_val, checkpoint_dir)
                    utils.dump_losses(
                        losses_train, losses_val,
                        os.path.join(checkpoint_dir, 'losses.pkl'))

    except KeyboardInterrupt:
        pass
    finally:
        # Do final save
        utils.save_models(encoder, decoder, optimizer, step, epoch,
                          losses_train, losses_val, checkpoint_dir)
        utils.dump_losses(losses_train, losses_val,
                          os.path.join(checkpoint_dir, 'losses.pkl'))
示例#11
0
def post_process(oriImg, canvas, paf_avg, all_peak_idxs, nonzero_vals,
                 dont_draw):

    all_peaks = [[] for x in range(N_JOINTS)]

    for i in xrange(all_peak_idxs.shape[0]):
        all_peaks[all_peak_idxs[i, 0]].append(
            (all_peak_idxs[i, 2], all_peak_idxs[i, 1], nonzero_vals[i], i))

    PEAKT = time.time()

    connection_all = []
    special_k = []
    mid_num = 10

    KTTT = time.time()

    # don't really know how this works
    for k in xrange(len(mapIdx)):
        idxs = []
        for x in mapIdx[k]:
            idxs.append(x - 19)

        score_mid = paf_avg[idxs, :, :]
        candA = all_peaks[limbSeq[k][0] - 1]
        candB = all_peaks[limbSeq[k][1] - 1]
        nA = len(candA)
        nB = len(candB)

        indexA, indexB = limbSeq[k]
        if (nA != 0 and nB != 0):
            connection_candidate = []
            for i in range(nA):
                for j in range(nB):
                    vec = np.subtract(candB[j][:2], candA[i][:2])
                    norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
                    if norm == 0:
                        continue
                    vec = np.divide(vec, norm)

                    startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
                                   np.linspace(candA[i][1], candB[j][1], num=mid_num))

                    vec_x = np.zeros(len(startend))
                    vec_y = np.zeros(len(startend))

                    for I in xrange(len(startend)):
                        vec_x[I] = score_mid[0,
                                             int(round(startend[I][1])),
                                             int(round(startend[I][0]))]
                        vec_y[I] = score_mid[1,
                                             int(round(startend[I][1])),
                                             int(round(startend[I][0]))]

                    score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(
                        vec_y, vec[1])
                    score_with_dist_prior = np.sum(
                        score_midpts) / len(score_midpts) + min(
                            0.5 * oriImg.shape[0] / norm - 1, 0)
                    criterion1 = len(
                        np.nonzero(score_midpts > param_['thre2'])
                        [0]) > 0.8 * len(score_midpts)
                    criterion2 = score_with_dist_prior > 0
                    if criterion1 and criterion2:
                        connection_candidate.append([
                            i, j, score_with_dist_prior,
                            score_with_dist_prior + candA[i][2] + candB[j][2]
                        ])

            connection_candidate = sorted(connection_candidate,
                                          key=get_third_item,
                                          reverse=True)
            connection = np.zeros((0, 5))
            for c in range(len(connection_candidate)):
                i, j, s = connection_candidate[c][0:3]
                if (i not in connection[:, 3] and j not in connection[:, 4]):
                    connection = np.vstack(
                        [connection, [candA[i][3], candB[j][3], s, i, j]])
                    if (len(connection) >= min(nA, nB)):
                        break

            connection_all.append(connection)
        else:
            special_k.append(k)
            connection_all.append([])

    # last number in each row is the total parts number of that person
    # the second last number in each row is the score of the overall configuration
    subset = -1 * np.ones((0, 20))
    cand_tmp = []
    for sublist in all_peaks:
        for item in sublist:
            cand_tmp.append(item)

    candidate = np.asarray(cand_tmp)

    # don't really know how this works either
    for k in xrange(len(mapIdx)):
        if k not in special_k:
            partAs = connection_all[k][:, 0]
            partBs = connection_all[k][:, 1]
            indexA, indexB = np.array(limbSeq[k]) - 1

            for i in range(len(connection_all[k])):  #= 1:size(temp,1)
                found = 0
                subset_idx = [-1, -1]
                for j in range(len(subset)):  #1:size(subset,1):
                    if subset[j][indexA] == partAs[i] or subset[j][
                            indexB] == partBs[i]:
                        subset_idx[found] = j
                        found += 1

                if found == 1:
                    j = subset_idx[0]
                    if (subset[j][indexB] != partBs[i]):
                        subset[j][indexB] = partBs[i]
                        subset[j][-1] += 1
                        subset[j][-2] += candidate[partBs[i].astype(int),
                                                   2] + connection_all[k][i][2]
                elif found == 2:  # if found 2 and disjoint, merge them
                    j1, j2 = subset_idx
                    #print "found = 2"
                    membership = ((subset[j1] >= 0).astype(int) +
                                  (subset[j2] >= 0).astype(int))[:-2]
                    if len(np.nonzero(membership == 2)[0]) == 0:  #merge
                        subset[j1][:-2] += (subset[j2][:-2] + 1)
                        subset[j1][-2:] += subset[j2][-2:]
                        subset[j1][-2] += connection_all[k][i][2]
                        subset = np.delete(subset, j2, 0)
                    else:  # as like found == 1
                        subset[j1][indexB] = partBs[i]
                        subset[j1][-1] += 1
                        subset[j1][-2] += candidate[
                            partBs[i].astype(int), 2] + connection_all[k][i][2]

                # if find no partA in the subset, create a new subset
                elif not found and k < 17:
                    row = -1 * np.ones(20)
                    row[indexA] = partAs[i]
                    row[indexB] = partBs[i]
                    row[-1] = 2
                    row[-2] = np.sum(
                        candidate[connection_all[k][i, :2].astype(int),
                                  2]) + connection_all[k][i][2]
                    subset = np.vstack([subset, row])

    #print subset.shape
    # delete some rows of subset which has few parts occur
    deleteIdx = []
    for i in range(len(subset)):
        if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
            deleteIdx.append(i)
    subset = np.delete(subset, deleteIdx, axis=0)
    N_BONES = N_JOINTS - 1

    if dont_draw:
        res = {'found_ppl': []}
        for n in range(len(subset)):
            skel = {}
            for i in range(N_BONES):
                index = subset[n][np.array(limbSeq[i]) - 1]
                if -1 in index:
                    continue
                Y = candidate[index.astype(int), 0]
                X = candidate[index.astype(int), 1]
                bone_name = bone_map[i]
                skel[bone_name] = ((X[0], Y[0]), (X[1], Y[1]))
            res['found_ppl'].append(skel)
        return res

    p = time.time()
    canvas = canvas.copy()

    for i in range(N_JOINTS):
        for j in range(len(all_peaks[i])):
            cv2.circle(canvas,
                       all_peaks[i][j][0:2],
                       4,
                       colors[i],
                       thickness=-1)

    stickwidth = 4

    N_BONES = N_JOINTS - 1

    for i in range(N_BONES):
        for n in range(len(subset)):
            index = subset[n][np.array(limbSeq[i]) - 1]
            if -1 in index:
                continue
            cur_canvas = canvas.copy()
            Y = candidate[index.astype(int), 0]
            X = candidate[index.astype(int), 1]
            mX = np.mean(X)
            mY = np.mean(Y)
            length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            polygon = cv2.ellipse2Poly(
                (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle),
                0, 360, 1)
            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
            cv2.putText(canvas, str(i), (int(mY), int(mX)),
                        cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 3)
            cv2.putText(canvas, str(i), (int(mY), int(mX)),
                        cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
            canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

    print('drawing took: ', time.time() - p)
    return canvas
def handle_one(oriImg):
    
    # for visualize
    canvas = np.copy(oriImg)
    imageToTest = Variable(T.transpose(T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(),0),2,3),1,2),volatile=True).cuda()
    print oriImg.shape
    scale = model_['boxsize'] / float(oriImg.shape[0])
    print scale
    h = int(oriImg.shape[0]*scale)
    w = int(oriImg.shape[1]*scale)
    pad_h = 0 if (h%model_['stride']==0) else model_['stride'] - (h % model_['stride']) 
    pad_w = 0 if (w%model_['stride']==0) else model_['stride'] - (w % model_['stride'])
    new_h = h+pad_h
    new_w = w+pad_w

    imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
    imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_['stride'], model_['padValue'])
    imageToTest_padded = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,2,0,1))/256 - 0.5

    feed = Variable(T.from_numpy(imageToTest_padded)).cuda()      

    output1,output2 = model(feed)

    heatmap = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output2)

    paf = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output1)       
    
    print heatmap.size()
    print paf.size()
    print type(heatmap)
    heatmap_avg = T.transpose(T.transpose(heatmap[0],0,1),1,2).data.cpu().numpy()
    paf_avg = T.transpose(T.transpose(paf[0],0,1),1,2).data.cpu().numpy()
        
    all_peaks = []
    peak_counter = 0

    #maps = 
    for part in range(18):
        map_ori = heatmap_avg[:,:,part]
        map = gaussian_filter(map_ori, sigma=3)
        
        map_left = np.zeros(map.shape)
        map_left[1:,:] = map[:-1,:]
        map_right = np.zeros(map.shape)
        map_right[:-1,:] = map[1:,:]
        map_up = np.zeros(map.shape)
        map_up[:,1:] = map[:,:-1]
        map_down = np.zeros(map.shape)
        map_down[:,:-1] = map[:,1:]
        
        peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param_['thre1']))
    #    peaks_binary = T.eq(
    #    peaks = zip(T.nonzero(peaks_binary)[0],T.nonzero(peaks_binary)[0])
        
        peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse
        
        peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]
        id = range(peak_counter, peak_counter + len(peaks))
        peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]

        all_peaks.append(peaks_with_score_and_id)
        peak_counter += len(peaks)
        
        
        
        
        
    connection_all = []
    special_k = []
    mid_num = 10

    for k in range(len(mapIdx)):
        score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]]
        candA = all_peaks[limbSeq[k][0]-1]
        candB = all_peaks[limbSeq[k][1]-1]
        nA = len(candA)
        nB = len(candB)
        indexA, indexB = limbSeq[k]
        if(nA != 0 and nB != 0):
            connection_candidate = []
            for i in range(nA):
                for j in range(nB):
                    vec = np.subtract(candB[j][:2], candA[i][:2])
                    norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1])
                    vec = np.divide(vec, norm)
                    
                    startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
                                   np.linspace(candA[i][1], candB[j][1], num=mid_num))
                    
                    vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
                                      for I in range(len(startend))])
                    vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
                                      for I in range(len(startend))])

                    score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
                    score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0)
                    criterion1 = len(np.nonzero(score_midpts > param_['thre2'])[0]) > 0.8 * len(score_midpts)
                    criterion2 = score_with_dist_prior > 0
                    if criterion1 and criterion2:
                        connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]])

            connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
            connection = np.zeros((0,5))
            for c in range(len(connection_candidate)):
                i,j,s = connection_candidate[c][0:3]
                if(i not in connection[:,3] and j not in connection[:,4]):
                    connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
                    if(len(connection) >= min(nA, nB)):
                        break

            connection_all.append(connection)
        else:
            special_k.append(k)
            connection_all.append([])

    # last number in each row is the total parts number of that person
    # the second last number in each row is the score of the overall configuration
    subset = -1 * np.ones((0, 20))
    candidate = np.array([item for sublist in all_peaks for item in sublist])

    for k in range(len(mapIdx)):
        if k not in special_k:
            partAs = connection_all[k][:,0]
            partBs = connection_all[k][:,1]
            indexA, indexB = np.array(limbSeq[k]) - 1

            for i in range(len(connection_all[k])): #= 1:size(temp,1)
                found = 0
                subset_idx = [-1, -1]
                for j in range(len(subset)): #1:size(subset,1):
                    if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
                        subset_idx[found] = j
                        found += 1
                
                if found == 1:
                    j = subset_idx[0]
                    if(subset[j][indexB] != partBs[i]):
                        subset[j][indexB] = partBs[i]
                        subset[j][-1] += 1
                        subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
                elif found == 2: # if found 2 and disjoint, merge them
                    j1, j2 = subset_idx
                    print "found = 2"
                    membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2]
                    if len(np.nonzero(membership == 2)[0]) == 0: #merge
                        subset[j1][:-2] += (subset[j2][:-2] + 1)
                        subset[j1][-2:] += subset[j2][-2:]
                        subset[j1][-2] += connection_all[k][i][2]
                        subset = np.delete(subset, j2, 0)
                    else: # as like found == 1
                        subset[j1][indexB] = partBs[i]
                        subset[j1][-1] += 1
                        subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]

                # if find no partA in the subset, create a new subset
                elif not found and k < 17:
                    row = -1 * np.ones(20)
                    row[indexA] = partAs[i]
                    row[indexB] = partBs[i]
                    row[-1] = 2
                    row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2]
                    subset = np.vstack([subset, row])

    # delete some rows of subset which has few parts occur
    deleteIdx = [];
    for i in range(len(subset)):
        if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:
            deleteIdx.append(i)
    subset = np.delete(subset, deleteIdx, axis=0)

#    canvas = cv2.imread(test_image) # B,G,R order
    for i in range(18):
        for j in range(len(all_peaks[i])):
            cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)

    stickwidth = 4

    for i in range(17):
        for n in range(len(subset)):
            index = subset[n][np.array(limbSeq[i])-1]
            if -1 in index:
                continue
            cur_canvas = canvas.copy()
            Y = candidate[index.astype(int), 0]
            X = candidate[index.astype(int), 1]
            mX = np.mean(X)
            mY = np.mean(Y)
            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
            canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

    return canvas
canvas = cv2.imread(test_image) # B,G,R order
for i in range(18):
    for j in range(len(all_peaks[i])):
        cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)

stickwidth = 4

for i in range(17):
    for n in range(len(subset)):
        index = subset[n][np.array(limbSeq[i])-1]
        if -1 in index:
            continue
        cur_canvas = canvas.copy()
        Y = candidate[index.astype(int), 0]
        X = candidate[index.astype(int), 1]
        mX = np.mean(X)
        mY = np.mean(Y)
        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
        polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
        cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
        canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

#Parallel(n_jobs=1)(delayed(handle_one)(i) for i in range(18))

toc =time.time()
print 'time is %.5f'%(toc-tic)     
cv2.imwrite('result.png',canvas)