def select_gaps(argv): pjoin = os.path.join parser = argparse.ArgumentParser("Select gaps") parser = parser_video(parser) parser.add_argument('--select-gaps', action='store_true') parser.add_argument("-s", "--d-scenelets", dest='s', type=argparse_check_exists, help="Folder containing original PiGraphs scenelets") args = parser.parse_args(argv) d = os.path.join(args.video, args.d) assert os.path.exists(d), "does not exist: %s" % d if os.path.isdir(args.video): if args.video.endswith(os.sep): args.video = args.video[:-1] name_query = args.video.split(os.sep)[-1] assert len(name_query), args.video.split(os.sep) p_query = pjoin(args.video, "skel_%s_unannot.json" % name_query) \ if os.path.isdir(args.video) else args.video assert p_query.endswith('.json'), "Need a skeleton file" # load initial video path (local poses) query = Scenelet.load(p_query, no_obj=True) frame_ids = query.skeleton.get_frames() centroids = Skeleton.get_resampled_centroids(start=frame_ids[0], end=frame_ids[-1], old_frame_ids=frame_ids, poses=query.skeleton.poses) depths_times_charnesses = [] skeleton = Skeleton() depths = [] for p in sorted(os.listdir(d)): ch = float(p.split('charness')[1][1:]) d_time = pjoin(d, p) p_skel = next( f for f in os.listdir(d_time) if os.path.isfile(pjoin(d_time, f)) and f.startswith('skel') and f.endswith('json') and '_00' in f) sclt = Scenelet.load(pjoin(d_time, p_skel)) mn, mx = sclt.skeleton.get_frames_min_max() frame_id = mn + (mx - mn) // 2 if query.skeleton.has_pose(frame_id): pos_3d = query.skeleton.get_centroid_3d(frame_id) else: lin_id = frame_id - frame_ids[0] pos_3d = centroids[lin_id, :] depth = np.linalg.norm(pos_3d) depths.append(depth) depths_times_charnesses.append( DepthTimeCharness(depth=depth, frame_id=frame_id, charness=ch)) hist, bin_edges = np.histogram(depths, bins=5) lg.debug("hist: %s" % hist) lg.debug("edges: %s" % bin_edges)
def get_pose_distance(query3d, path_match, gap): skel_q = query3d.skeleton mid_frame = gap[0] + (gap[1] - gap[0]) // 2 frame_q = min((frame_id for frame_id in skel_q.get_frames()), key=lambda frame_id: abs(frame_id - mid_frame)) time = skel_q.get_time(frame_id=frame_q) print('Closest to {} is {} with time {}'.format(mid_frame, frame_q, time)) match = Scenelet.load(path_match) skel_m = match.skeleton frame_m = skel_m.find_time(time=time) time_m = skel_m.get_time(frame_id=frame_m) print('Closest match is {} with time {}'.format(frame_m, time_m)) diffs = [] for frame_id in range(frame_q - 1, frame_q + 2): if not skel_q.has_pose(frame_id): print('Skipping frame_id {} in query because missing'.format( frame_id)) continue _time_q = skel_q.get_time(frame_id=frame_id) _frame_m = skel_m.find_time(time=_time_q) _time_m = skel_m.get_time(frame_id=_frame_m) if abs(_time_m - _time_q) > 1.: print('Skipping matched time of {} because too far from {}'.format( _time_m, _time_q)) continue pose_q = skel_q.get_pose(frame_id=frame_id) pose_m = skel_m.get_pose(frame_id=_frame_m) diff = get_pose_diff(pose_q=pose_q, pose_m=pose_m) print('Diff: {}'.format(diff)) diffs.append(diff) # return np.mean(diffs), len(diffs) > 0 return np.max(diffs), len(diffs) > 0
def ipol_ik(p_scenelet, postfix): """Uses Blender's IK engine to interpolate 3D joint positions in time. Sphere positions are tied to Rig endpoints, and these endpoints will be as close as possible to the noisy targets. Hence the Sphere positions are the ones we save as interpolated 3D joint positions. The method will try to preserve all, non-skeleton related information in the scenelet by only overwriting its skeleton. Visibility and confidence will be discarded (TODO:?). Args: p_scenelet (str): Path to scenelet containing skeleton to interpolate. postfix (str): Tag to append to skeleton name before the json extension. Returns: o_path (str): Path, where it saved the interpolated version. """ # Load skeleton assert os.path.exists(p_scenelet), "Does not exist: %s" % p_scenelet scenelet = Scenelet.load(p_scenelet) # Prepare animation time bpy.context.scene.render.fps = 10 bpy.context.scene.render.fps_base = 1 # Call rig importer import_skeleton_animation( skeleton=scenelet.skeleton, name='Output', add_camera=False, add_trajectory=False, time_scale=0.1, skeleton_transparency=0, ) # Move head back to top (2D marker is at tip of head, but visually, # it looks better, if it's rendered with the center of the head around # the nose) for obj in bpy.data.objects: if obj.name.endswith("HEAD.Sphere"): cnstr = next(c for c in obj.constraints if c.type == 'COPY_LOCATION') cnstr.head_tail = 1. # Extract Sphere endpoints as the new skeleton positions scenelet.skeleton = extract_skeleton(scene=bpy.context.scene) # Save to disk stem, ext = os.path.splitext(p_scenelet) o_path = "%s_%s.json" % (stem, postfix) scenelet.save(o_path, save_obj=True) # Return path return o_path
def compare_other_to_gt(): scene = bpy.context.scene assert 'postfix' in globals(), "Need postfix parameter" name_scene = bpy.path.abspath('//').split(os.sep)[-3] if 'object_correspondences' not in globals(): object_correspondences = get_correspondences(name_scene, postfix) p_gt = bpy.path.abspath("//../quant/skel_gt.json") sclt_gt = Scenelet.load(p_gt) p_other = bpy.path.abspath("//../quant/skel_%s.json" % postfix) if True or not os.path.exists(p_other): frame_ids = find_keyframes(lambda ob: ob.name.startswith('Output.')) sclt_other = save_annotated_scenelet(sc_path=p_other, scene=scene, frame_ids=frame_ids) else: sclt_other = Scenelet.load(p_other) compare_scenes(sclt_other, sclt_gt, object_correspondences)
def parse_charness_histograms(dmat): assert 5 == len(dmat['pigraph_histogram_charness'].shape), \ "Not 5D? %s" % repr(dmat['pigraph_histogram_charness'].shape) bins = np.transpose(dmat['pigraph_histogram_charness'], (4, 3, 0, 1, 2)) names_scenelets = dmat['pigraph_scenelet_names'] if 'pigraph_histogram_params' in dmat: hists = \ dict((Scenelet.to_old_name(name_scenelet[0][0]), SquareHistogram.from_mat( params=dmat['pigraph_histogram_params'], bins=bins[id_scenelet, :, :, :], categories=dmat['categories'])) for id_scenelet, name_scenelet in enumerate(names_scenelets)) else: hists = \ dict((Scenelet.to_old_name(name_scenelet[0][0]), RadialHistogram.from_mat( angular_edges=dmat['angular_edges'], radial_edges=dmat['radial_edges'], bins=bins[id_scenelet, :, :, :], categories=dmat['categories'])) for id_scenelet, name_scenelet in enumerate(names_scenelets)) return hists
def read_scenelets(d_scenelets, limit=0): pjoin = os.path.join # get full pigraph scenes p_scenelets = [ pjoin(d_scenelets, d) for d in os.listdir(d_scenelets) if os.path.isdir(pjoin(d_scenelets, d)) ] # for p_scenelets = [ pjoin(d, f) for d in p_scenelets for f in os.listdir(d) if f.startswith('skel') and f.endswith('.json') ] out = [] for p_scenelet in p_scenelets: lg.info("Reading %s" % p_scenelet) out.append(Scenelet.load(p_scenelet)) if limit != 0 and len(out) >= limit: break return out
if __name__ == 'add_constraint_to_spheres': add_constraint_to_spheres() elif __name__ == 'save_annotated_scenelet': assert 'postfix' in globals(), "Need postfix parameter" print("abspath: %s" % bpy.path.abspath('//')) name_scene = bpy.path.abspath('//').split(os.sep)[-3] scene = bpy.context.scene # Aron commented out in Jan2019 if 'object_correspondences' not in globals(): object_correspondences = get_correspondences(name_scene) p_file = bpy.path.abspath('//../output/skel_output.json') assert os.path.exists(p_file), "Does not exist: %s" % p_file sclt_ours = Scenelet.load(p_file) p_out = bpy.path.abspath('//../quant/skel_output.json') sclt_ours.save(p_out, save_obj=True) p_gt = bpy.path.abspath("//../quant/skel_%s.json" % postfix) if not os.path.exists(p_gt): if 'frame_ids' not in globals(): frame_ids = get_frame_ids(name_scene) # frame_ids = find_keyframes(lambda ob: ob.name.startswith('Output.')) sclt_gt = save_annotated_scenelet(sc_path=p_gt, scene=scene, frame_ids=frame_ids, frame_multiplier=frame_multiplier, time_multiplier=time_multiplier) else: print("NOT OVERWRITING GT")
def optimize_path(skel_ours, skel_ours_2d, images, intrinsics, path_skel, ground_rot, shape_orig=None, use_huber=False, weight_smooth=0.01, show=False, frames_ignore=None, resample=True, depth_init=10., p_constraints=None, smooth_mode=SmoothMode.ACCEL): """Optimize 3D path so that it matches the 2D corresponding observations. Args: skel_ours (Skeleton): 3D skeleton from LFD. skel_ours_2d (Skeleton): 2D feature points from LFD. images (dict): Color images for debug, keyed by frame_ids. camera_name (str): Initialize intrinsics matrix based on name of camera. path_skel (str): Path of input file from LFD on disk, used to create paths for intermediate result. shape_orig (tuple): Height and width of original images before LFD scaled them. use_huber (bool): Deprecated. weight_smooth (float): Smoothness term weight. winsorize_limit (float): Outlier detection parameter. show (bool): Show debug visualizations. frames_ignore (set): Deprecated. resample (bool): Fill in missing poses by interpolating using Blender's IK. depth_init (float): Initial depth for LFD poses. p_constraints (str): Path to 3D constraints scenelet file. smooth_mode (SmoothMode): Smooth velocity or acceleration. """ # scale 2D detections to canonical camera coordinates np_poses_2d = \ skel_ours_2d.poses[:, :2, :] \ - np.expand_dims(intrinsics[:2, 2], axis=1) np_poses_2d[:, 0, :] /= intrinsics[0, 0] np_poses_2d[:, 1, :] /= intrinsics[1, 1] n_frames = skel_ours.poses.shape[0] np_translation = np.zeros(shape=(n_frames, 3), dtype=np.float32) np_translation[:, 1] = -1. np_translation[:, 2] = \ np.random.uniform(-depth_init * 0.25, depth_init * 0.25, np_translation.shape[0]) \ + depth_init np_rotation = np.zeros(shape=(n_frames, 3), dtype=np.float32) frame_ids = np.array(skel_ours.get_frames(), dtype=np.float32) np_visibility = skel_ours_2d.get_confidence_matrix(frame_ids=frame_ids, dtype='f4') if p_constraints is not None: sclt_cnstr = Scenelet.load(p_constraints) np_cnstr_mask = np.zeros(shape=(len(frame_ids), Joint.get_num_joints()), dtype=np.float32) np_cnstr = np.zeros(shape=(len(frame_ids), 3, Joint.get_num_joints()), dtype=np.float32) for frame_id, confs in sclt_cnstr.confidence.items(): lin_id = None for j, conf in confs.items(): if conf > 0.5: if lin_id is None: lin_id = next( lin_id_ for lin_id_, frame_id_ in enumerate(frame_ids) if frame_id_ == frame_id) np_cnstr_mask[lin_id, j] = conf np_cnstr[lin_id, :, j] = \ sclt_cnstr.skeleton.get_joint_3d( joint_id=j, frame_id=frame_id) else: np_cnstr_mask = None np_cnstr = None spans = skel_ours.get_actor_empty_frames() dt = frame_ids[1:].astype(np.float32) \ - frame_ids[:-1].astype(np.float32) dt_pos_inv = np.reciprocal(dt, dtype=np.float32) dt_vel_inv = np.divide(np.float32(2.), dt[1:] + dt[:-1]) # ensure smoothness weight multipliers are not affected by # actor-transitions if skel_ours.n_actors > 1 and len(spans): for lin_id in range(len(dt)): frame_id0 = frame_ids[lin_id] frame_id1 = frame_ids[lin_id + 1] span = next((span_ for span_ in spans if span_[0] == frame_id0), None) if span is not None: assert frame_id1 == span[1], "No" dt[lin_id] = 0. dt_pos_inv[lin_id] = 0. dt_vel_inv[lin_id] = 0. dt_vel_inv[lin_id - 1] = 1. / dt[lin_id - 1] forwards = np.array([ skel_ours.get_forward(frame_id, estimate_ok=True, k=0) for frame_id in skel_ours.get_frames() ]) # from alignment import get_angle # xs = np.hstack(( # np.ones(shape=(len(forwards), 1)), # np.zeros(shape=(len(forwards), 2)) # )) # print(xs.shape) print(forwards.shape) unit_x = np.array((1., 0., 0.)) np_angles = [-np.arctan2(forward[2], forward[0]) for forward in forwards] print(forwards, np_angles) # ank_diff = \ # np.exp( # -2. * np.max( # [ # np.linalg.norm( # (skel_ours.poses[1:, :, joint] # - skel_ours.poses[:-1, :, joint]).T # * dt_pos_inv, axis=0 # ).astype(np.float32) # for joint in {Joint.LANK, Joint.RANK} # ], # axis=0 # ) # ) # assert ank_diff.shape == (skel_ours.poses.shape[0]-1,), \ # "Wrong shape: %s" % repr(ank_diff.shape) # cam_angle = [np.deg2rad(-8.)] assert np.isclose(ground_rot[1], 0.) and np.isclose(ground_rot[2], 0.), \ "Assumed only x rotation" # assert ground_rot[0] <= 0, "Negative means looking down, why looknig up?" cam_angle = [np.deg2rad(ground_rot[0])] # assert False, "Fixed angle!" device_name = '/gpu:0' if tf.test.is_gpu_available() else '/cpu:0' devices = {device_name} for device in devices: with Timer(device, verbose=True): graph = tf.Graph() with graph.as_default(), tf.device(device): tf_visibility = tf.Variable(np.tile(np_visibility, (1, 2, 1)), name='visibility', trainable=False, dtype=tf.float32) tf_dt_pos_inv = \ tf.Variable(np.tile(dt_pos_inv, (1, 3)).reshape(-1, 3), name='dt_pos_inv', trainable=False, dtype=tf.float32) tf_dt_vel_inv = \ tf.constant(np.tile(dt_vel_inv, (1, 3)).reshape(-1, 3), name='dt_vel_inv', dtype=tf.float32) # input data pos_3d_in = tf.Variable(skel_ours.poses.astype(np.float32), trainable=False, name='pos_3d_in', dtype=tf.float32) pos_2d_in = tf.Variable(np_poses_2d.astype(np.float32), trainable=False, name='pos_2d_in', dtype=tf.float32) params_camera = tf.Variable(initial_value=cam_angle, dtype=tf.float32, trainable=True) cam_sn = tf.sin(params_camera) cam_cs = tf.cos(params_camera) transform_camera = tf.reshape(tf.stack([ 1., 0., 0., 0., 0., cam_cs[0], cam_sn[0], 0., 0., -cam_sn[0], cam_cs[0], 0., 0., 0., 0., 1. ], axis=0), shape=(4, 4)) # 3D translation translation = tf.Variable(np_translation, name='translation') # 3D rotation (Euler XYZ) rotation = tf.Variable(np_rotation, name='rotation') fw_angles = tf.Variable(np_angles, name='angles') # rotation around y my_zeros = tf.zeros((n_frames, 1)) my_ones = tf.ones((n_frames, 1)) c = tf.cos(tf.slice(rotation, [0, 1], [n_frames, 1])) s = tf.sin(tf.slice(rotation, [0, 1], [n_frames, 1])) t0 = tf.concat([c, my_zeros, -s, my_zeros], axis=1) t1 = tf.concat([my_zeros, my_ones, my_zeros, my_zeros], axis=1) t2 = tf.concat([s, my_zeros, c, my_zeros], axis=1) t3 = tf.concat([my_zeros, my_zeros, my_zeros, my_ones], axis=1) transform = tf.stack([t0, t1, t2, t3], axis=2, name="transform") transform = tf.einsum('ij,ajk->aik', transform_camera, transform)[:, :3, :3] # transform to 3d pos_3d = tf.matmul(transform, pos_3d_in) \ + tf.tile(tf.expand_dims(translation, 2), [1, 1, int(pos_3d_in.shape[2])]) # constraints loss_cnstr = None if np_cnstr is not None: constraints = tf.Variable(np_cnstr, trainable=False, name='constraints', dtype=tf.float32) constraints_mask = tf.Variable(np_cnstr_mask, trainable=False, name='constraints_mask', dtype=tf.float32) cnstr_diff = tf.reduce_sum(tf.squared_difference( pos_3d, constraints), axis=1, name='constraints_difference') cnstr_diff_masked = tf.multiply( constraints_mask, cnstr_diff, name='constraints_difference_masked') loss_cnstr = tf.reduce_sum(cnstr_diff_masked, name='constraints_loss') # perspective divide pos_2d = tf.divide( tf.slice(pos_3d, [0, 0, 0], [n_frames, 2, -1]), tf.slice(pos_3d, [0, 2, 0], [n_frames, 1, -1])) if use_huber: diff = huber_loss(pos_2d_in, pos_2d, 1.) masked = diff * tf_visibility loss_reproj = tf.nn.l2_loss(masked) lg.info("Doing huber on reprojection, NOT translation") else: # re-projection loss diff = pos_2d - pos_2d_in # mask loss by 2d key-point visibility masked = diff * tf_visibility loss_reproj = tf.nn.l2_loss(masked) lg.info("NOT doing huber") sys.stderr.write( "TODO: Move huber to translation, not reconstruction\n") # translation smoothness dx = tf.multiply( x=0.5, y=tf.add( pos_3d[1:, :, Joint.LHIP] - pos_3d[:-1, :, Joint.LHIP], pos_3d[1:, :, Joint.RHIP] - pos_3d[:-1, :, Joint.RHIP], ), name="average_hip_displacement_3d") tf_velocity = tf.multiply(dx, tf_dt_pos_inv) tf_acceleration_z = tf.multiply(x=dx[1:, 2:3] - dx[:-1, 2:3], y=tf_dt_vel_inv[:, 2:3], name="acceleration_z") if smooth_mode == SmoothMode.VELOCITY: # if GT, use full smoothness to fix 2-frame flicker if np_cnstr is not None: print('Smoothing all velocity!') loss_transl_smooth = \ weight_smooth * tf.nn.l2_loss(tf_velocity) else: # Normal mode, don't oversmooth screen-space loss_transl_smooth = \ weight_smooth * tf.nn.l2_loss(tf_velocity[:, 2:3]) elif smooth_mode == SmoothMode.ACCEL: loss_transl_smooth = \ weight_smooth * tf.nn.l2_loss(tf_acceleration_z) else: raise RuntimeError( 'Unknown smooth mode: {}'.format(smooth_mode)) if show: sqr_accel_z = weight_smooth * tf.square(tf_acceleration_z) if weight_smooth > 0.: lg.info("Smoothing in time!") loss = loss_reproj + loss_transl_smooth else: lg.warning("Not smoothing!") loss = loss_reproj if loss_cnstr is not None: loss += 1000 * loss_cnstr # hip0 = tf.nn.l2_normalize(pos_3d[:-1, :, Joint.RHIP] - pos_3d[:-1, :, Joint.LHIP]) # hip1 = tf.nn.l2_normalize(pos_3d[1:, :, Joint.RHIP] - pos_3d[1:, :, Joint.RHIP]) # dots = tf.reduce_sum(tf.multiply(hip0, hip1), axis=1) # print(dots) # loss_dot = tf.nn.l2_loss(1. - dots) # loss_ang = fw_angles + rotation[:, 1] # print(loss_ang) # loss_ang = tf.square(loss_ang[1:] - loss_ang[:-1]) # print(loss_ang) # two_pi_sqr = tf.constant((2. * 3.14159)**2., dtype=tf.float32) # print(two_pi_sqr) # loss_ang = tf.reduce_mean(tf.where(loss_ang > two_pi_sqr, loss_ang - two_pi_sqr, loss_ang)) # print(loss_ang) # loss += loss_ang # # optimize # optimizer = ScipyOptimizerInterface( loss, var_list=[translation, rotation], options={'gtol': 1e-12}, var_to_bounds={rotation: (-np.pi / 2., np.pi / 2.)}) with tf.Session(graph=graph) as session: session.run(tf.global_variables_initializer()) optimizer.minimize(session) np_pos_3d_out, np_pos_2d_out, np_transl_out, np_masked, \ np_acceleration, np_loss_transl_smooth, np_dt_vel = \ session.run([pos_3d, pos_2d, translation, masked, tf_acceleration_z, loss_transl_smooth, tf_dt_vel_inv]) if show: o_sqr_accel_z = session.run(sqr_accel_z) o_vel = session.run(tf_velocity) o_dx = session.run(dx) o_rot = session.run(rotation) # o_dx, o_dx2 = session.run([accel_bak, acceleration2]) # assert np.allclose(o_dx, o_dx2), "no" o_cam = session.run(fetches=[params_camera]) print("camera angle: %s" % np.rad2deg(o_cam[0])) # o_losses = session.run([loss_reproj, loss_transl_smooth, loss_dot, loss_ang]) o_losses = session.run([loss_reproj, loss_transl_smooth]) print('losses: {}'.format(o_losses)) # o_dots = session.run(dots) # with open('tmp/dots.txt', 'w') as fout: # fout.write('\n'.join((str(e) for e in o_dots.tolist()))) fixed_frames = [] # for lin_frame_id in range(np_transl_out.shape[0]): # if np_transl_out[lin_frame_id, 2] < 0.: # print("Correcting frame_id %d: %s" # % (skel_ours.get_lin_id_for_frame_id(lin_frame_id), # np_transl_out[lin_frame_id, :])) # if lin_frame_id > 0: # np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id-1, :] # else: # np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id+1, :] # fixed_frames.append(lin_frame_id) # debug_forwards(skel_ours.poses, np_pos_3d_out, o_rot, forwards, np_angles) # z_jumps = np_pos_3d_out[1:, 2, Joint.PELV] - np_pos_3d_out[:-1, 2, Joint.PELV] # out = scipy.stats.mstats.winsorize(z_jumps, limits=1.) # plt.figure() # plt.plot(pos_3d[:, 2, Joint.PELV]) # plt.show() # sys.exit(0) # diff = np.linalg.norm(out - displ, axis=1) if len(fixed_frames): print("Re-optimizing...") with tf.Session(graph=graph) as session: np_pos_3d_out, np_pos_2d_out, np_transl_out = \ session.run(fetches=[pos_3d, pos_2d, translation], feed_dict={transform: np_transl_out}) if show: lim_fr = [105, 115, 135] fig = plt.figure() accel_thr = 0. # np.percentile(o_sqr_accel_z, 25) ax = plt.subplot2grid((2, 2), (0, 0), colspan=2) # print("np_masked:%s" % np_masked) # plt.plot(np_masked[:, ) ax.plot(np.linalg.norm(np_acceleration[lim_fr[0]:lim_fr[1]], axis=1), '--o', label='accel') ax.add_artist(Line2D([0, len(o_sqr_accel_z)], [accel_thr, accel_thr])) # plt.plot(np_dt_vel[:, 0], label='dt velocity') # plt.plot(np.linalg.norm(np_f_accel, axis=1), '--x', label='f_accel') # plt.plot(ank_diff, label='ank_diff') ax.plot(o_sqr_accel_z[lim_fr[0]:lim_fr[1] + 1], '--x', label='loss accel_z') ax.legend() ax2 = plt.subplot2grid((2, 2), (1, 0), aspect='equal') ax2.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV], np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x') for i, vel in enumerate(o_vel): if not (lim_fr[0] <= i <= lim_fr[1]): continue p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV] p1 = np_pos_3d_out[i, [0, 2], Joint.PELV] ax2.annotate( "%f = ((%g - %g) + (%g - %g)) * %g = %g" % (vel[2], np_pos_3d_out[i + 1, 2, Joint.LHIP], np_pos_3d_out[i, 2, Joint.LHIP], np_pos_3d_out[i + 1, 2, Joint.RHIP], np_pos_3d_out[i, 2, Joint.RHIP], np_dt_vel[i, 2], o_dx[i, 2]), xy=((p0[0] + p1[0]) / 2., (p0[1] + p1[1]) / 2.)) ax2.set_title('velocities') ax1 = plt.subplot2grid((2, 2), (1, 1), aspect='equal') ax1.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV], np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x') for i, lacc in enumerate(o_sqr_accel_z): if not (lim_fr[0] <= i <= lim_fr[1]): continue if lacc > accel_thr: p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV] ax1.annotate("%.3f" % np_acceleration[i], xy=(p0[0], p0[1])) ax.annotate("%.3f" % np.log10(lacc), xy=(i - lim_fr[0], abs(np_acceleration[i]))) ax1.set_title('accelerations') plt.show() np.set_printoptions(linewidth=200) np_pos_2d_out[:, 0, :] *= intrinsics[0, 0] np_pos_2d_out[:, 1, :] *= intrinsics[1, 1] np_pos_2d_out[:, 0, :] += intrinsics[0, 2] np_pos_2d_out[:, 1, :] += intrinsics[1, 2] np_poses_2d[:, 0, :] *= intrinsics[0, 0] np_poses_2d[:, 1, :] *= intrinsics[1, 1] np_poses_2d[:, 0, :] += intrinsics[0, 2] np_poses_2d[:, 1, :] += intrinsics[1, 2] out_images = {} if shape_orig is not None: frames_2d = skel_ours_2d.get_frames() for frame_id2 in frames_2d: try: lin_frame_id = skel_ours_2d.get_lin_id_for_frame_id(frame_id2) except KeyError: lin_frame_id = None frame_id = skel_ours_2d.mod_frame_id(frame_id=frame_id2) im = None if frame_id in out_images: im = out_images[frame_id] elif len(images): if frame_id not in images: lg.warning("Not enough images, the video was probably cut " "after LiftingFromTheDeep was run.") continue im = copy.deepcopy(images[frame_id]) im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) else: im = np.zeros( (shape_orig[0].astype(int), shape_orig[1].astype(int), 3), dtype='i1') if lin_frame_id is not None: for jid in range(np_pos_2d_out.shape[2]): if skel_ours_2d.is_visible(frame_id2, jid): p2d = tuple(np_pos_2d_out[lin_frame_id, :, jid].astype(int).tolist()) p2d_det = tuple(np_poses_2d[lin_frame_id, :, jid].astype(int).tolist()) cv2.line(im, p2d, p2d_det, color=(100, 100, 100), thickness=3) cv2.circle(im, p2d, radius=3, color=(0, 0, 200), thickness=-1) cv2.circle(im, p2d_det, radius=3, color=(0, 200, 0), thickness=-1) out_images[frame_id] = im # cv2.imshow("Out", im) # cv2.waitKey(50) if False: # visualize fig = plt.figure() ax = fig.gca(projection='3d') for frame_id in range(0, np_pos_3d_out.shape[0], 1): j = Joint.PELV ax.scatter(np_pos_3d_out[frame_id, 0, j], np_pos_3d_out[frame_id, 2, j], -np_pos_3d_out[frame_id, 1, j], marker='o') # smallest = np_pos_3d_out.min() # largest = np_pos_3d_out.max() ax.set_xlim3d(-5., 5.) ax.set_xlabel('x') ax.set_ylim3d(-5., 5.) ax.set_ylabel('y') ax.set_zlim3d(-5., 5.) ax.set_zlabel('z') if False: # visualize fig = plt.figure() ax = fig.gca(projection='3d') for frame_id in range(0, np_pos_3d_out.shape[0], 1): for j in range(np_pos_3d_out.shape[2]): ax.scatter(np_pos_3d_out[frame_id, 0, j], np_pos_3d_out[frame_id, 2, j], -np_pos_3d_out[frame_id, 1, j], marker='o') # smallest = np_pos_3d_out.min() # largest = np_pos_3d_out.max() ax.set_xlim3d(-5., 5.) ax.set_xlabel('x') ax.set_ylim3d(-5., 5.) ax.set_ylabel('y') ax.set_zlim3d(-5., 5.) ax.set_zlabel('z') plt.show() assert all(a == b for a, b in zip(skel_ours.poses.shape, np_pos_3d_out.shape)), \ "no" skel_ours.poses = np_pos_3d_out return skel_ours, out_images, intrinsics
def main(argv): np.set_printoptions(suppress=True, linewidth=200) pjoin = os.path.join parser = argparse.ArgumentParser("matcher") parser.add_argument("d_scenelets", type=argparse_check_exists, help="Folder containing scenelets") parser.add_argument("video", type=argparse_check_exists, help="Input path") parser.add_argument("--gap-size-limit", type=int, help="Smallest gap size to still explain") args = parser.parse_args(argv) d_query = os.path.dirname(args.video) # 2d keypoint rescale p_im = pjoin(d_query, 'origjpg', 'color_00100.jpg') im_ = cv2.imread(p_im) shape_orig = im_.shape scale_2d = shape_orig[0] / float(INPUT_SIZE) query = Scenelet.load(args.video, no_obj=True) tr_ground = np.array(query.aux_info['ground'], dtype=np.float32) print("tr: %s" % tr_ground) name_query = os.path.basename(args.video).split('_')[1] query_2d = Scenelet.load(pjoin(d_query, "skel_%s_2d_00.json" % name_query)).skeleton p_intr = pjoin(d_query, 'intrinsics.json') intr = np.array(json.load(open(p_intr, 'r')), dtype=np.float32) lg.debug("intr: %s" % intr) gaps = find_gaps(query.skeleton, min_pad=1) p_scenelets_pickle = pjoin(args.d_scenelets, 'match_gap_scenelets.pickle') if os.path.exists(p_scenelets_pickle): scenelets = pickle_load(open(p_scenelets_pickle, 'rb')) else: scenelets = read_scenelets(args.d_scenelets, limit=0) pickle.dump(scenelets, open(p_scenelets_pickle, 'wb')) p_out_sclts = pjoin(d_query, 'fill') if os.path.exists(p_out_sclts): shutil.rmtree(p_out_sclts) os.makedirs(p_out_sclts) times = [] for gap_id, gap in enumerate(gaps): lg.debug("gap is %s" % repr(gap)) if gap[1] - gap[0] < args.gap_size_limit: continue with Timer("gap %d" % gap_id) as timer: chosen = [] for sc_id, sclt in enumerate(scenelets): lg.info("scenelet %d / %d" % (sc_id, len(scenelets))) sclt = scenelets[sc_id] ground_obj = next(ob for ob in sclt.objects.values() if ob.label == 'floor') ground_part = ground_obj.get_part_by_name('floor') lg.debug("floor: %s" % ground_part) ground_transform = ground_part.obb.as_transform() lg.debug("floor: %s" % ground_transform) # sys.exit(0) # chosen.extend( out_sclts = match(query, d_query, query_2d, sclt, intr, gap, tr_ground, scale=scale_2d) if not len(out_sclts): continue # pick best from scene chosen.append( [out_sclts[0][i] for i in range(len(out_sclts[0]))] + [sc_id]) # break # os.system("rm %s/skel_%s_fill_%03d_%03d__*.json" # % (p_out_sclts, name_query, gap[0], gap[1])) chosen = sorted(chosen, key=lambda score_sclt: score_sclt[0]) for sid, (score, out_sclt, sc_id) in enumerate(chosen): p_out = pjoin( p_out_sclts, "skel_%s_fill_%03d_%03d__%02d.json" % (name_query, gap[0], gap[1], sid)) out_sclt.save(p_out) if sid > 5: break times.append(timer.get_elapsed_ms()) lg.info("mean time per gap: %s" % np.mean(times))
def evaluate(named_solution, sclt_gt, sclt_gt_2d, frame_ids, path_scene, stats, actions=None, scale=100.): """ :param named_solution: :param sclt_gt: :param sclt_gt_2d: :param frame_ids: :param path_scene: :param stats: :param actions: :param scale: scale from meter to cm :return: """ p_intrinsics = os.path.join(path_scene, 'intrinsics.json') intrinsics = np.array(json.load(open(p_intrinsics, 'r')), dtype=np.float32) print('Loading {}'.format(named_solution.path)) sclt_sol = Scenelet.load(named_solution.path) sclt_sol.skeleton._visibility.clear() sclt_sol.skeleton._confidence.clear() sclt_sol.skeleton._forwards.clear() sclt_sol.skeleton = Skeleton.resample(sclt_sol.skeleton) err_3d = Comparison(title='World', path_scene=path_scene, name_method=named_solution.name_method, stats=stats) err_3d_local = Comparison(title='Local', path_scene=path_scene, name_method=named_solution.name_method, stats=stats) err_2d = Comparison(title='2D', path_scene=path_scene, name_method=named_solution.name_method, stats=stats) occlusion = sclt_gt.aux_info['occluded'] missing = {'method': [], 'gt': []} for frame_id in frame_ids: try: entry = sclt_sol.skeleton.get_pose(frame_id=frame_id) except KeyError: missing['method'].append(frame_id) continue if actions is not None and frame_id in actions \ and actions[frame_id] == 'walking': print('Skipping non-interactive frame {} {}'.format( frame_id, actions[frame_id])) continue # 3D gt = sclt_gt.skeleton.get_pose(frame_id=frame_id) occluded = occlusion['{:d}'.format(frame_id)] err_3d.add(entry=entry, gt=gt, frame_id=frame_id, scale=scale, occluded=occluded) # Local 3D local_entry = entry - entry[:, Joint.PELV:Joint.PELV + 1] local_gt = gt - gt[:, Joint.PELV:Joint.PELV + 1] err_3d_local.add(entry=local_entry, gt=local_gt, frame_id=frame_id, scale=scale, occluded=occluded) # # GT 2D # gt_2d = sclt_gt_2d.skeleton.get_pose(frame_id=frame_id) entry_2d = entry[:2, :] / entry[2, :] entry_2d[0, :] *= intrinsics[0, 0] entry_2d[1, :] *= intrinsics[1, 1] entry_2d[0, :] += intrinsics[0, 2] entry_2d[1, :] += intrinsics[1, 2] err_2d.add(entry=entry_2d, gt=gt_2d[:2, :], frame_id=frame_id, occluded=occluded) # stats.paint(path_dest=os.path.join(path_scene, 'debug_eval')) mn, mx = np.min(sclt_gt.skeleton.poses, axis=(0, 2)), \ np.max(sclt_gt.skeleton.poses, axis=(0, 2)) err_3d.dimensions = (mx - mn) * scale assert len(missing['method']) < len(frame_ids) / 2, (missing, frame_ids) return OrderedDict({ err_3d.title: err_3d, err_3d_local.title: err_3d_local, err_2d.title: err_2d, '_missing': missing })
def match(query_full, d_query, query_2d_full, scene, intr, gap, tr_ground, scale, thresh_log_conf=7.5, w_3d=0.01, fps=3, step_samples=100): with_y = False # optimize for y as well np.set_printoptions(suppress=True, linewidth=220) pjoin = os.path.join len_gap = gap[1] - gap[0] + 1 query, q_v = get_partial_scenelet(query_full, start=gap[0], end=gap[1] + 1, fps=1) q_v_sum = np.sum(q_v) q_v_sum_inv = np.float32(1. / q_v_sum) # lg.debug("q_v_sum: %s/%s" % (q_v_sum, q_v.size)) # scene_min_y = scene.skeleton.get_min_y(tr_ground) # lg.debug("scene_min_y: %s" % repr(scene_min_y)) mid_frames = range(len_gap * fps, scene.skeleton.poses.shape[0] - len_gap * fps, step_samples) if not len(mid_frames): return [] scenelets, sc_v = (np.array(e) for e in zip(*[ get_partial_scenelet( scene, mid_frame_id=mid_frame_id, n_frames=len_gap, fps=fps) for mid_frame_id in mid_frames ])) # for i, (scenelet, sc_v_) in enumerate(zip(scenelets, sc_v)): # mn = np.min(scenelet[sc_v_.astype('b1'), 1, :]) # scenelets[i, :, 1, :] -= mn # mn = np.min(scenelets[i, sc_v_.astype('b1'), 1, :]) # scenelets = np.array(scenelets, dtype=np.float32) # sc_v = np.array(sc_v, dtype=np.int32) # print("sc_v: %s" % sc_v) # print("q_v: %s" % q_v) lg.debug("have %d/%d 3D poses in scenelet, and %d/%d in query" % (np.sum(sc_v), sc_v.shape[0], np.sum(q_v), q_v.shape[0])) query_2d = np.zeros((len_gap, 2, 16), dtype=np.float32) conf_2d = np.zeros((len_gap, 1, 16), dtype=np.float32) for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)): if query_2d_full.has_pose(frame_id): query_2d[lin_id, :, :] = query_2d_full.get_pose(frame_id)[:2, :] # else: # lg.warning("Query2d_full does not have pose at %d?" % frame_id) # im = im_.copy() if query_2d_full.has_confidence(frame_id): # print("showing %s" % frame_id) for joint, conf in query_2d_full._confidence[frame_id].items(): log_conf = abs(np.log(conf)) if conf >= 0. else 0. # print("conf: %g, log_conf: %g" % (conf, log_conf)) # if log_conf <= thresh_log_conf: # p2d = scale * query_2d_full.get_joint_3d(joint, # frame_id=frame_id) # p2d = (int(round(p2d[0])), int(round(p2d[1]))) # cv2.circle(im, center=p2d, # radius=int(round(3)), # color=(1., 1., 1., 0.5), thickness=1) conf_2d[lin_id, 0, joint] = max( 0., (thresh_log_conf - log_conf) / thresh_log_conf) # cv2.imshow('im', im) # cv2.waitKey(100) # while cv2.waitKey() != 27: pass conf_2d /= np.max(conf_2d) # scale from Denis' scale to current image size query_2d *= scale # move to normalized camera coordinates query_2d -= intr[:2, 2:3] query_2d[:, 0, :] /= intr[0, 0] query_2d[:, 1, :] /= intr[1, 1] # # initialize translation # # centroid of query poses c3d = np.mean(query[q_v.astype('b1'), :, :], axis=(0, 2)) # estimate scenelet centroids sclt_means = np.array([ np.mean(scenelets[i, sc_v[i, ...].astype('b1'), ...], axis=(0, 2)) for i in range(scenelets.shape[0]) ], dtype=np.float32) # don't change height sclt_means[:, 1] = 0 scenelets -= sclt_means[:, None, :, None] lg.debug("means: %s" % repr(sclt_means.shape)) if with_y: np_translation = np.array([c3d for i in range(scenelets.shape[0])], dtype=np.float32) else: np_translation = np.array( [c3d[[0, 2]] for i in range(scenelets.shape[0])], dtype=np.float32) np_rotation = np.array( [np.pi * (i % 2) for i in range(scenelets.shape[0])], dtype=np.float32)[:, None] n_cands = np_translation.shape[0] graph = tf.Graph() with graph.as_default(), tf.device('/gpu:0'): # 3D translation translation_ = tf.Variable(initial_value=np_translation, name='translation', dtype=tf.float32) t_y = tf.fill(dims=(n_cands, ), value=(tr_ground[1, 3]).astype(np.float32)) # t_y = tf.fill(dims=(n_cands,), value=np.float32(0.)) lg.debug("t_y: %s" % t_y) if with_y: translation = translation_ else: translation = tf.concat( (translation_[:, 0:1], t_y[:, None], translation_[:, 1:2]), axis=1) lg.debug("translation: %s" % translation) # 3D rotation (Euler XYZ) rotation = tf.Variable(np_rotation, name='rotation', dtype=tf.float32) # lg.debug("rotation: %s" % rotation) w = tf.Variable(conf_2d, trainable=False, name='w', dtype=tf.float32) pos_3d_in = tf.Variable(query, trainable=False, name='pos_3d_in', dtype=tf.float32) # pos_3d_in = tf.constant(query, name='pos_3d_in', dtype=tf.float32) pos_2d_in = tf.Variable(query_2d, trainable=False, name='pos_2d_in', dtype=tf.float32) # pos_2d_in = tf.constant(query_2d, name='pos_2d_in', # dtype=tf.float32) pos_3d_sclt = tf.Variable(scenelets, trainable=False, name='pos_3d_sclt', dtype=tf.float32) # print("pos_3d_sclt: %s" % pos_3d_sclt) # rotation around y my_zeros = tf.zeros((n_cands, 1), dtype=tf.float32, name='my_zeros') # tf.add_to_collection('to_init', my_zeros) my_ones = tf.ones((n_cands, 1)) # tf.add_to_collection('to_init', my_ones) c = tf.cos(rotation, 'cos') # tf.add_to_collection('to_init', c) s = tf.sin(rotation, 'sin') # t0 = tf.concat([c, my_zeros, -s], axis=1) # t1 = tf.concat([my_zeros, my_ones, my_zeros], axis=1) # t2 = tf.concat([s, my_zeros, c], axis=1) # transform = tf.stack([t0, t1, t2], axis=2, name="transform") # print("t: %s" % transform) transform = tf.concat( [c, my_zeros, -s, my_zeros, my_ones, my_zeros, s, my_zeros, c], axis=1) transform = tf.reshape(transform, ((-1, 3, 3)), name='transform') print("t2: %s" % transform) # lg.debug("transform: %s" % transform) # transform to 3d # pos_3d = tf.matmul(transform, pos_3d_sclt) \ # + tf.tile(tf.expand_dims(translation, 2), # [1, 1, int(pos_3d_in.shape[2])]) # pos_3d = tf.einsum("bjk,bcjd->bcjd", transform, pos_3d_sclt) shp = pos_3d_sclt.get_shape().as_list() transform_tiled = tf.tile(transform[:, None, :, :, None], (1, shp[1], 1, 1, shp[3])) # print("transform_tiled: %s" % transform_tiled) pos_3d = tf.einsum("abijd,abjd->abid", transform_tiled, pos_3d_sclt) # print("pos_3d: %s" % pos_3d) pos_3d += translation[:, None, :, None] #pos_3d = pos_3d_sclt # print("pos_3d: %s" % pos_3d) # perspective divide # pos_2d = tf.divide( # tf.slice(pos_3d, [0, 0, 0], [n_cands, 2, -1]), # tf.slice(pos_3d, [0, 2, 0], [n_cands, 1, -1])) pos_2d = tf.divide(pos_3d[:, :, :2, :], pos_3d[:, :, 2:3, :]) # print("pos_2d: %s" % pos_2d) diff = pos_2d - pos_2d_in # mask loss by 2d key-point visibility # print("w: %s" % w) # w_sum = tf.reduce_sum() masked = tf.multiply(diff, w) # print(masked) # loss_reproj = tf.nn.l2_loss(masked) # loss_reproj = tf.reduce_sum(tf.square(masked[:, :, 0, :]) # + tf.square(masked[:, :, 1, :]), # axis=[1, 2]) masked_sqr = tf.square(masked[:, :, 0, :]) \ + tf.square(masked[:, :, 1, :]) loss_reproj = tf.reduce_sum(masked_sqr, axis=[1, 2]) # lg.debug("loss_reproj: %s" % loss_reproj) # distance from existing 3D skeletons d_3d = q_v_sum_inv * tf.multiply(pos_3d - query[None, ...], q_v[None, :, None, None], name='diff_3d') # print(d_3d) loss_3d = w_3d * tf.reduce_sum(tf.square(d_3d[:, :, 0, :]) + tf.square( d_3d[:, :, 1, :]) + tf.square(d_3d[:, :, 2, :]), axis=[1, 2], name='loss_3d_each') # print(loss_3d) loss = tf.reduce_sum(loss_reproj) + tf.reduce_sum(loss_3d) # optimize optimizer = ScipyOptimizerInterface(loss, var_list=[translation_, rotation], options={'gtol': 1e-12}) with Timer('solve', verbose=True) as t: with tf.Session(graph=graph) as session: session.run(tf.global_variables_initializer()) optimizer.minimize(session) o_pos_3d, o_pos_2d, o_masked, o_t, o_r, o_w, o_d_3d, \ o_loss_reproj, o_loss_3d, o_transform, o_translation = \ session.run([ pos_3d, pos_2d, masked, translation, rotation, w, d_3d, loss_reproj, loss_3d, transform, translation]) o_masked_sqr = session.run(masked_sqr) # o_t, o_r = session.run([translation, rotation]) # print("pos_3d: %s" % o_pos_3d) # print("pos_2d: %s" % o_pos_2d) # print("o_loss_reproj: %s, o_loss_3d: %s" % (o_loss_reproj, o_loss_3d)) # print("t: %s" % o_t) # print("r: %s" % o_r) chosen = sorted((i for i in range(o_loss_reproj.shape[0])), key=lambda i2: o_loss_reproj[i2] + o_loss_3d[i2]) lg.info("Best candidate is %d with error %g + %g" % (chosen[0], o_loss_reproj[chosen[0]], o_loss_3d[chosen[0]])) # print("masked: %s" % o_masked) # opp = np.zeros_like(o_pos_3d) # for i in range(o_pos_3d.shape[0]): # for j in range(o_pos_3d.shape[1]): # for k in range(16): # opp[i, j, :2, k] = o_pos_3d[i, j, :2, k] / o_pos_3d[i, j, 2:3, k] # # opp[i, j, 0, k] *= intr[0, 0] # # opp[i, j, 1, k] *= intr[1, 1] # # opp[i, j, :2, k] *= intr[1, 1] # a = o_pos_2d[i, j, :, k] # b = opp[i, j, :2, k] # if not np.allclose(a, b): # print("diff: %s, %s" % (a, b)) o_pos_2d[:, :, 0, :] *= intr[0, 0] o_pos_2d[:, :, 1, :] *= intr[1, 1] o_pos_2d += intr[:2, 2:3] # for cand_id in range(o_pos_2d.shape[0]): if False: # return # print("w: %s" % o_w) # print("conf_2d: %s" % conf_2d) # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...]) query_2d[:, 0, :] *= intr[0, 0] query_2d[:, 1, :] *= intr[1, 1] # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...]) query_2d += intr[:2, 2:3] # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...]) ims = {} for cand_id in chosen[:5]: lg.debug("starting %s" % cand_id) pos_ = o_pos_2d[cand_id, ...] for lin_id in range(pos_.shape[0]): frame_id = gap[0] + lin_id try: im = ims[frame_id].copy() except KeyError: p_im = pjoin(d_query, 'origjpg', "color_%05d.jpg" % frame_id) ims[frame_id] = cv2.imread(p_im) im = ims[frame_id].copy() # im = im_.copy() for jid in range(pos_.shape[-1]): xy2 = int(round(query_2d[lin_id, 0, jid])), \ int(round(query_2d[lin_id, 1, jid])) # print("printing %s" % repr(xy)) cv2.circle(im, center=xy2, radius=5, color=(10., 200., 10.), thickness=-1) if o_masked[cand_id, lin_id, 0, jid] > 0 \ or o_w[lin_id, 0, jid] > 0: xy = int(round(pos_[lin_id, 0, jid])), \ int(round(pos_[lin_id, 1, jid])) # print("printing %s" % repr(xy)) cv2.circle(im, center=xy, radius=3, color=(200., 10., 10.), thickness=-1) cv2.putText(im, "d2d: %g" % o_masked_sqr[cand_id, lin_id, jid], org=((xy2[0] - xy[0]) // 2 + xy[0], (xy2[1] - xy[1]) // 2 + xy[1]), fontFace=1, fontScale=1, color=(0., 0., 0.)) cv2.line(im, xy, xy2, color=(0., 0., 0.)) d3d = o_d_3d[cand_id, lin_id, :, jid] d3d_norm = np.linalg.norm(d3d) if d3d_norm > 0.: cv2.putText( im, "%g" % d3d_norm, org=((xy2[0] - xy[0]) // 2 + xy[0] + 10, (xy2[1] - xy[1]) // 2 + xy[1]), fontFace=1, fontScale=1, color=(0., 0., 255.)) cv2.putText(im, text="%d::%02d" % (cand_id, lin_id), org=(40, 80), fontFace=1, fontScale=2, color=(255., 255., 255.)) # pos_2d_ = np.matmul(intr, pos_[lin_id, :2, :] / pos_[lin_id, 2:3, :]) # for p2d in pos_2d_ cv2.imshow('im', im) cv2.waitKey() break while cv2.waitKey() != 27: pass out_scenelets = [] for cand_id in chosen[:1]: lg.debug("score of %d is %g + %g = %g" % (cand_id, o_loss_reproj[cand_id], o_loss_3d[cand_id], o_loss_reproj[cand_id] + o_loss_3d[cand_id])) scenelet = Scenelet() rate = query_full.skeleton.get_rate() prev_time = None for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)): time_ = query_full.get_time(frame_id) if lin_id and rate is None: rate = time_ - prev_time if time_ == frame_id: time_ = prev_time + rate scenelet.skeleton.set_pose(frame_id=frame_id, pose=o_pos_3d[cand_id, lin_id, :, :], time=time_) prev_time = time_ tr = np.concatenate((np.concatenate( (o_transform[cand_id, ...], o_translation[cand_id, None, :].T), axis=1), [[0., 0., 0., 1.]]), axis=0) tr_m = np.concatenate( (np.concatenate((np.identity(3), -sclt_means[cand_id, None, :].T), axis=1), [[0., 0., 0., 1.]]), axis=0) tr = np.matmul(tr, tr_m) for oid, ob in scene.objects.items(): if ob.label in ('wall', 'floor'): continue ob2 = copy.deepcopy(ob) ob2.apply_transform(tr) scenelet.add_object(obj_id=oid, scene_obj=ob2, clone=False) scenelet.name_scene = scene.name_scene out_scenelets.append((o_loss_reproj[cand_id], scenelet)) return out_scenelets
def main(argv): from imapper.pose.opt_consistent import main as opt_consistent pjoin = os.path.join parser = argparse.ArgumentParser("Fit full video") parser.add_argument("video", type=argparse_check_exists, help="Input path") parser.add_argument("step_size", type=int, help="Stepsize in frames.") parser.add_argument("window_size", type=int, help="Window size in frames.") parser.add_argument('--wp', type=float, help="Projection weight.", default=1.) parser.add_argument('--ws', type=float, help="Smoothness weight.", default=.1) parser.add_argument('--wo', type=float, help="Occlusion weight.", default=0.1) parser.add_argument('--wi', type=float, help="Intersection weight.", default=1. # used to be 10. ) parser.add_argument('--gtol', type=float, help="Optimizer gradient tolerance (termination " "criterion).", default=1e-6) parser.add_argument('--maxiter', type=int, help="Optimizer max number of iterations.", default=0) parser.add_argument('-w-occlusion', action='store_true', help="Estimate occlusion score.") parser.add_argument('-no-isec', action='store_true', help='Don\'t use intersection terms') parser.add_argument('--dest-dir', type=str, help="Name of subdirectory to save output to.", default='opt1') parser.add_argument("-s", "--d-scenelets", dest='s', type=argparse_check_exists, help="Folder containing original PiGraphs scenelets") parser.add_argument('--batch-size', type=int, help="How many scenelets to optimize at once.", default=1500) parser.add_argument('--output-n', type=int, help="How many candidates to output per batch and " "overall.", default=200) parser.add_argument('--filter-same-scene', action='store_true', help="Hold out same scene scenelets.") args = parser.parse_args(argv) # get video parent directory d_query = args.video if os.path.isdir(args.video) \ else os.path.dirname(args.video) # save call log to video directory with open(pjoin(d_query, 'args_opt_consistent.txt'), 'a') as f_args: f_args.write('(python3 ') f_args.write(" ".join(sys.argv)) f_args.write(")\n") # parse video path name_query = os.path.split(d_query)[-1] p_query = pjoin(d_query, "skel_%s_unannot.json" % name_query) \ if os.path.isdir(args.video) else args.video assert p_query.endswith('.json'), "Need a skeleton file" print("name_query: %s" % name_query) cache_scenes = None skipped = [] # load initial video path (local poses) query = Scenelet.load(p_query, no_obj=True) frame_ids = query.skeleton.get_frames() half_window_size = args.window_size // 2 for mid_frame_id in range(frame_ids[0] + half_window_size, frame_ids[-1] - half_window_size + 1, args.step_size): gap = (mid_frame_id - half_window_size, mid_frame_id + half_window_size) assert gap[0] >= frame_ids[0] assert gap[1] <= frame_ids[-1] pose_count = sum(1 for _frame_id in range(gap[0], gap[1] + 1) if query.skeleton.has_pose(_frame_id)) if pose_count < 9: print("Skipping span because not enough poses: %s" % pose_count) skipped.append((gap, pose_count)) same_actor = query.skeleton.n_actors == 1 # type: bool if not same_actor: same_actor = query.skeleton.get_actor_id(frame_id=gap[0]) \ == query.skeleton.get_actor_id(frame_id=gap[1]) if not same_actor: print('skipping gap {:d}...{:d}, not same actor'.format( gap[0], gap[1])) continue lg.info("gap: %s" % repr(gap)) argv = [ '-silent', '--wp', "%g" % args.wp, '--ws', "%g" % args.ws, '--wo', "%g" % args.wo, '--wi', "%g" % args.wi, '--nomocap', # added 16/4/2018 '-v', args.video, '--output-n', "%d" % args.output_n ] if args.w_occlusion: argv.extend(['-w-occlusion']) if args.no_isec: argv.extend(['-no-isec']) if args.filter_same_scene: argv.extend(['--filter-scenes', name_query.partition('_')[0]]) # else: # assert False, "crossvalidation assumed" if args.maxiter: argv.extend(['--maxiter', "%d" % args.maxiter]) argv.extend([ 'independent', '-s', args.s, '--gap', "%d" % gap[0], "%d" % gap[1], '--dest-dir', args.dest_dir, '-tc', '-0.1', '--batch-size', "%d" % args.batch_size ]) lg.info("argv: %s" % argv) # if 'once' not in locals(): try: _cache_scenes = opt_consistent(argv, cache_scenes) if isinstance(_cache_scenes, list) and len(_cache_scenes) \ and (cache_scenes is None or len(_cache_scenes) != len(cache_scenes)): cache_scenes = _cache_scenes except FileNotFoundError as e: lg.error("e: %s" % e) if e.__str__().endswith('_2d_00.json\''): from imapper.pose.main_denis import main as opt0 argv_opt0 = [ 's8', '-d', "%s/denis" % d_query, '-smooth', '0.005' ] opt0(argv_opt0) else: print(e.__str__()) opt_consistent(argv) # once = True show_folder([args.video]) extract_gaps([args.video, args.s]) print("skipped: %s" % skipped)
def main(argv=None): np.set_printoptions(suppress=True) parser = argparse.ArgumentParser() parser.add_argument('d', help="Folder of scene") parser.add_argument('-resolution', help='Target resolution for occupancy map', default=0.1) parser.add_argument( '-thresh-area', help='Ratio of occupancy map cell area that has to be occupied ' 'for it to count as occupied', default=0.1) parser.add_argument('-postfix', type=str, help="Scene postfix for augmentation", default="") args = parser.parse_args(argv if argv is not None else sys.argv) res_target = args.resolution if args.postfix and len(args.postfix) and not args.postfix.startswith('_'): args.postfix = "_%s" % args.postfix path_parent, name_input = os.path.split(os.path.abspath(args.d)) lg.warning("name input: %s" % name_input) path_for_tf = os.path.abspath( os.path.join(path_parent, os.pardir, 'dataset')) # if 'video' not in path_parent else os.path.join(path_parent, 'dataset') if not os.path.exists(path_for_tf): os.makedirs(path_for_tf, mode=0o0775) lg.debug("Loading scenelet...") path_scenelet = os.path.join(args.d, "skel_%s.json" % name_input) scenelet = Scenelet.load(path_scenelet) lg.debug("Scenelet: %s" % scenelet) path_state_pickle = os.path.join(args.d, "state%s.pickle" % args.postfix) if not os.path.exists(path_state_pickle): lg.error("Does not exist: %s" % path_state_pickle) return False # assert os.path.exists(path_state_pickle), \ # "Does not exist: %s" % path_state_pickle lg.debug("Loading volume...") state = pickle_load(open(path_state_pickle, 'rb')) lg.debug("Loaded volume...") lg.debug("Creating scene from scenelet") if not no_vis: vis = Visualizer(win_size=(1024, 1024)) vis.add_coords() else: vis = None # scene = Scene(scenelet.name_scenelet) # colors = {0: (200., 0., 0.), 1: (0., 200., 0.), 2: (0., 0., 200.)} # unit_x = np.array((1., 0., 0.)) occup = State(room=state.room, tr_ground_inv=None, res_theta=state.resolution[3], resolution=[res_target, res_target, res_target]) occup.get_volume(labels_to_lin_ids_arg=state.get_labels_to_lin_ids()) occup_angle = np.ones(shape=(len( occup.volume), occup.volume[0].shape[0], occup.volume[0].shape[1], 1), dtype=np.float32) * -1. assert np.min(occup_angle) < 0. and np.max(occup_angle) < 0., "Not empty" grid_polys = get_grid_shapely(occup=occup, res_orig=state.resolution) occup.volume.flags.writeable = True volume_occp = occup.volume angles = sorted(state.get_angles()) labels_to_lin_ids = occup.get_labels_to_lin_ids() had_vtk_problem = no_vis plt.figure() rects = [] for oid, ob in scenelet.objects.items(): assert oid >= 0, "Need positive here" label = ob.label if label in TRANSLATIONS_CATEGORIES: label = TRANSLATIONS_CATEGORIES[label] if label not in labels_to_lin_ids: continue try: poly = get_poly([part.obb for part in ob.parts.values()]) except ValueError as e: print("\n===========\n\nShapely error: %s for %s\n\n" % (e, (label, oid, ob))) with open('error.log', 'a') as f: f.write("[%s] %d, %s, %s\n" % (args.d, oid, label, ob)) continue ob_angle = ob.get_angle(positive_only=True) assert 0. <= ob_angle <= 2 * np.pi, "No: %g" % ob_angle rect = get_rectangle(poly, ob_angle) rect.extend([oid, CATEGORIES[label]]) rects.append(rect) cat_id = labels_to_lin_ids[label] # cat_id in volume, not categories for gp in grid_polys: # skip, if not occupied enough if gp.poly.intersection(poly).area / gp.area < args.thresh_area: continue # save occupancy gp.occupancy = 1. id_angle_lower = None id_angle_upper = None if ob_angle > angles[-1]: id_angle_lower = len(angles) - 1 id_angle_upper = 0 else: for id_angle, angle in enumerate(angles): if ob_angle < angle: id_angle_upper = id_angle id_angle_lower = id_angle - 1 break assert id_angle_lower is not None \ and id_angle_upper is not None, \ "Wrong?" assert id_angle_upper != id_angle_lower, \ "? %s %s" % (id_angle_lower, id_angle_upper) # cache xy = gp.xy # zero means empty in occupancy, # so object ids are shifted with 1 # we need object ids to filter "untouched" objects # in tfrecords_create if volume_occp[cat_id, xy[0], xy[1], id_angle_lower] == 0 \ or label in CATEGORIES_DOMINANT: volume_occp[cat_id, xy[0], xy[1], id_angle_lower] = oid + 1 if volume_occp[cat_id, xy[0], xy[1], id_angle_upper] == 0 \ or label in CATEGORIES_DOMINANT: volume_occp[cat_id, xy[0], xy[1], id_angle_upper] = oid + 1 # angles are right now not per-category, but per-scene # hence, an object can only overwrite, if it's usually "above" # other objects, e.g. a table # this is a hack for a z-test if occup_angle[cat_id, xy[0], xy[1], 0] < 0. \ or label in CATEGORIES_DOMINANT: occup_angle[cat_id, xy[0], xy[1], 0] = ob_angle if not had_vtk_problem: color = COLORS_CATEGORIES[label] if label in COLORS_CATEGORIES \ else (200., 200., 200.) try: for id_part, part in ob.parts.items(): vis.add_mesh(MeshOBJ.from_obb(part.obb), name="ob_%02d_part_%02d" % (oid, id_part), color=color) except AttributeError: print("VTK problem...") had_vtk_problem = True #plt.savefig() plt.close() if not had_vtk_problem: vis.set_camera_pos(pos=(0., -1., 0.)) vis.camera().SetFocalPoint(0., 0., 0.) vis.camera().SetViewUp(-1., 0., 0.) vis.set_camera_type(is_ortho=True) vis.camera().SetParallelScale(3.) # vis.show() name_recording = "%s_%s" % (os.path.basename(args.d), args.postfix) \ if args.postfix else os.path.basename(args.d) lg.info("name_recording: %s" % name_recording) path_out_occp = os.path.join(os.path.dirname(args.d), os.pardir, 'occupancy', name_recording) if not os.path.exists(path_out_occp): os.makedirs(path_out_occp) # prepare www storage www_grid = {'evidence': {}, 'occ': {}} # normalize evidence maps vmax = 0. ims = {} for cat, cat_id in labels_to_lin_ids.items(): ims[cat] = np.squeeze( np.sum(state.volume[cat_id, :, :, :], axis=2, keepdims=True)) vmax = max(vmax, np.max(ims[cat])) # gather joined occupancy map im_sum = None # for each evidence category for cat, cat_id in labels_to_lin_ids.items(): im = ims[cat] / vmax * 255. path_out_im = os.path.join(path_out_occp, "e_%s.jpg" % cat) cv2.imwrite(path_out_im, im) # lg.debug("wrote to %s" % path_out_im) www_grid['evidence'][cat] = path_out_im im = np.squeeze(volume_occp[cat_id, :, :, 0]) path_out_im = os.path.join(path_out_occp, "o_%s.jpg" % cat) cv2.imwrite(path_out_im, im * 255.) # lg.debug("wrote to %s" % path_out_im) www_grid['occ'][cat] = path_out_im if im_sum is None: im_sum = im.copy() else: im_sum = np.maximum(im, im_sum) # # save dataset # name_input_old = name_input if args.postfix is not None and len(args.postfix): name_input = "%s_%s" % (name_input, args.postfix) # state path_state_dest = os.path.join(path_for_tf, "state_%s.pickle" % name_input) shutil.copyfile(path_state_pickle, path_state_dest) lg.info("Copied\n\t%s to\n\t%s" % (path_state_pickle, path_state_dest)) # occupancy path_occup_dest = os.path.join(path_for_tf, "occup_%s.pickle" % name_input) pickle.dump(occup, open(path_occup_dest, 'wb'), -1) lg.info("Wrote to %s" % path_occup_dest) # occupancy_angle path_occup_angle_dest = os.path.join(path_for_tf, "angle_%s.npy" % name_input) min_angle = np.min(occup_angle) assert min_angle < 0., "No empty cells??" lg.debug("min angle is %s" % min_angle) np.save(open(path_occup_angle_dest, 'wb'), occup_angle) lg.info("Wrote to %s" % path_occup_angle_dest) # skeleton path_copied = shutil.copy2(path_scenelet, path_for_tf) lg.info("Copied\n\t%s to \n\t%s" % (path_scenelet, path_copied)) # charness skeleton name_skeleton_charness = "skel_%s-charness.json" % name_input_old path_scenelet_charness = os.path.join(args.d, name_skeleton_charness) assert os.path.exists(path_scenelet_charness), \ "Does not exist: %s" % path_scenelet_charness shutil.copy2(path_scenelet_charness, path_for_tf) assert os.path.exists(os.path.join(path_for_tf, name_skeleton_charness)), \ "Does not exist: %s" % os.path.join(path_for_tf, name_skeleton_charness) # rectangles name_rectangles = "rectangles_%s.npy" % name_input_old path_rectangles = os.path.join(path_for_tf, name_rectangles) np.save(open(path_rectangles, 'wb'), rects) # # visualize # path_out_im = os.path.join(path_out_occp, '3d.png') if not had_vtk_problem: vis.save_png(path_out_im) www_grid['3d'] = path_out_im path_out_im = os.path.join(path_out_occp, 'o_sum.png') max_im_sum = np.max(im_sum) if max_im_sum > 0.: cv2.imwrite(path_out_im, im_sum / max_im_sum * 255.) else: cv2.imwrite(path_out_im, im_sum * 255.) www_grid['o_sum'] = path_out_im path_www = os.path.join(path_out_occp, os.pardir) with open(os.path.join(path_www, 'index.html'), 'a') as f: f.write("<style> img {image-rendering: pixelated; } </style>\n") f.write("<script>\n") f.write("</script>\n") f.write("<h3>%s</h3>" % os.path.basename(args.d)) f.write('<table>\n') f.write("<tr>\n") f.write("<th>3d</th>") f.write("<th>Occupancy sum</th>") for cat in www_grid['evidence']: f.write("\t<th>%s</th>\n" % cat) f.write("<th></th>\n") # titles f.write("</tr>\n") f.write("<tr>\n") # 3D f.write("\t<td rowspan=\"2\">\n") path_im = os.path.relpath(www_grid['3d'], path_www) f.write("\t<a href=\"%s\">\n" "\t\t<img src=\"%s\" height=\"400\" />\n" "\t</a>\n" % (path_im, path_im)) # Evidence sum f.write("\t<td rowspan=\"2\">\n") path_im = os.path.relpath(www_grid['o_sum'], path_www) f.write("\t<a href=\"%s\">\n" "\t\t<img src=\"%s\" height=\"400\" />\n" "\t</a>\n" % (path_im, path_im)) # Evidence for cat in www_grid['evidence']: f.write("<td style=\"padding-bottom: 2px\">\n") path_im = os.path.relpath(www_grid['evidence'][cat], path_www) f.write("\t<a href=\"%s\">\n" "\t\t<img src=\"%s\" height=\"200\" />\n" "\t</a>\n" % (path_im, path_im)) f.write("</td>\n") f.write("<td>Evidence</td>\n") f.write("\t</td>\n") f.write("</tr>\n") f.write("<tr>\n") for cat in www_grid['occ']: f.write("<td>\n") path_im = os.path.relpath(www_grid['occ'][cat], path_www) f.write("\t<a href=\"%s\">\n" "\t\t<img src=\"%s\" height=\"200\" />\n" "</a>\n" % (path_im, path_im)) f.write("</td>\n") f.write("<td>Occupancy map</td>\n") f.write("</tr>") f.write('</table>') return True
def show_folder(argv): # python3 stealth/pose/fit_full_video.py --show /home/amonszpa/workspace/stealth/data/video_recordings/scenelets/lobby15 opt1 # python3 stealth/pose/visualization/show_charness_scores.py --show /media/data/amonszpa/stealth/shared/video_recordings/library1 -o opt1 pjoin = os.path.join parser = argparse.ArgumentParser("Fit full video") parser.add_argument('--show', action='store_true') parser.add_argument("video", type=argparse_check_exists, help="Input path") parser.add_argument( '-o', '--opt-folder', help="Which optimization output to process. Default: opt1", default='opt1') parser.add_argument("--window-size", type=int, help="Window size in frames.", default=20) args = parser.parse_args(argv) d = os.path.join(args.video, args.opt_folder) assert os.path.exists(d), "does not exist: %s" % d # parse video path if args.video.endswith(os.sep): args.video = args.video[:-1] name_query = os.path.split(args.video)[-1] print("split: %s" % repr(os.path.split(args.video))) p_query = pjoin(args.video, "skel_%s_unannot.json" % name_query) \ if os.path.isdir(args.video) else args.video assert p_query.endswith('.json'), "Need a skeleton file" # load initial video path (local poses) query = Scenelet.load(p_query, no_obj=True) frame_ids = query.skeleton.get_frames() centroids = Skeleton.get_resampled_centroids(start=frame_ids[0], end=frame_ids[-1], old_frame_ids=frame_ids, poses=query.skeleton.poses) depths_times_charnesses = [] skeleton = Skeleton() depths = [] skeleton.charness_poses = {} # this is in Scenelet incorrectly... skeleton.score_fit = {} # inventing this now skeleton.score_reproj = {} # inventing this now for p in sorted(os.listdir(d)): d_time = pjoin(d, p) if not os.path.isdir(d_time): continue p_skel = next( f for f in os.listdir(d_time) if os.path.isfile(pjoin(d_time, f)) and f.startswith('skel') and f.endswith('json') and '_00' in f) sclt = Scenelet.load(pjoin(d_time, p_skel)) mn, mx = sclt.skeleton.get_frames_min_max() frame_id = mn + (mx - mn) // 2 if query.skeleton.has_pose(frame_id): pos_3d = query.skeleton.get_centroid_3d(frame_id) else: lin_id = frame_id - frame_ids[0] pos_3d = centroids[lin_id, :] # put centroid for each joint skeleton.set_pose(frame_id=frame_id, pose=np.tile(pos_3d[:, None], (1, 16))) with open(pjoin(d_time, 'avg_charness.json')) as fch: data = json.load(fch) set_or_max(skeleton.charness_poses, frame_id, data['avg_charness']) # if frame_id in skeleton.charness_poses: # lg.warning("Maxing charness at frame %d" % frame_id) # skeleton.charness_poses[frame_id] = max( # skeleton.charness_poses[frame_id], data['avg_charness']) # else: # skeleton.charness_poses[frame_id] = data['avg_charness'] # fit scores if 'score_fit' in sclt.aux_info: set_or_max(skeleton.score_fit, frame_id, sclt.aux_info['score_fit']) else: set_or_max(skeleton.score_fit, frame_id, 0.) if 'score_reproj' in sclt.aux_info: set_or_max(skeleton.score_reproj, frame_id, sclt.aux_info['score_reproj']) else: set_or_max(skeleton.score_reproj, frame_id, 0.) fig = plt.figure(figsize=(16, 12), dpi=100) ax = fig.add_subplot(121, aspect='equal') X = [] # skeleton x Z = [] # skeleton z (depth) C = [] # charness F = [] # score_fit R = [] # score_reproj T = [] # times for frame_id in skeleton.get_frames(): c = skeleton.get_joint_3d(6, frame_id=frame_id) X.append(c[0]) Z.append(c[2]) C.append(skeleton.charness_poses[frame_id]) F.append(skeleton.score_fit[frame_id]) R.append(skeleton.score_reproj[frame_id]) T.append(frame_id) ax.plot(X, Z, 'k--') for frame_id in skeleton.get_frames(): if frame_id % 5: continue c = skeleton.get_joint_3d(6, frame_id=frame_id) ax.annotate("%d" % frame_id, xy=(c[0], c[2]), zorder=5) cax = ax.scatter(X, Z, c=C, cmap='jet', zorder=5) fig.colorbar(cax) z_lim = (min(Z), max(Z)) z_span = (z_lim[1] - z_lim[0]) // 2 x_lim = min(X), max(X) x_span = (x_lim[1] - x_lim[0]) // 2 pad = .5 dspan = z_span - x_span if dspan > 0: ax.set_xlim(x_lim[0] - dspan - pad, x_lim[1] + dspan + pad) ax.set_ylim(z_lim[0] - pad, z_lim[1] + pad) else: ax.set_xlim(x_lim[0] - pad, x_lim[1] + pad) ax.set_ylim(z_lim[0] + dspan - pad, z_lim[1] - dspan + pad) ax.set_title('Fit score weighted characteristicness\ndisplayed at ' 'interpolated initial path position') ax = fig.add_subplot(122) ax.plot(T, C, 'x--', label='max charness') charness_threshes = [0.4, 0.35, 0.3] mn_thr_charness = min(charness_threshes) mx_thr_charness = max(charness_threshes) for ct in charness_threshes: ax.plot([T[0], T[-1]], [ct, ct], 'r') ax.annotate("charness %g" % ct, xy=(T[0], ct + 0.005)) charness_sorted = sorted([(fid, c) for fid, c in skeleton.charness_poses.items()], key=lambda e: e[1]) to_show = [] # Fitness divisor = 5. F_ = -np.log10(F) / divisor print(F_) ax.plot(T, F_, 'x--', label="-log_10(score) / %.0f" % divisor) mx_F_ = np.percentile(F_, 90) # np.max(F_) for i, (t, f) in enumerate(zip(T, F_)): if f > mx_F_ or any(C[i] > ct for ct in charness_threshes): to_show.append(i) # ax.annotate("%.4f" % (F[i]), xy=(t, f), xytext=(t+4, f-0.02), # arrowprops=dict(facecolor='none', shrink=0.03)) # charness # ax.annotate("%.3f\n#%d" % (C[i], t), xy=(t, C[i]), # xytext=(t-10, C[i]-0.02), # arrowprops=dict(facecolor='none', shrink=0.03)) windows = [] # [(t_start, t_max, t_end), ...] crossings = {} # Reproj R_ = -np.log10(R) / divisor # ax.plot(T, R_, 'x--', label="-log_10(score reproj) / %.0f" % divisor) mx_R_ = np.max(R_) is_above = [False for _ in charness_threshes] mx_above = [] for i, (t, r) in enumerate(zip(T, R_)): # if i in to_show: # ax.annotate("%.4f" % (R[i]), xy=(t, r), xytext=(t-10, r+0.02), # arrowprops=dict(facecolor='none', shrink=0.03)) # ax.annotate("%d" % t, xy=(t, r - 0.01)) if (i + 1 < len(C)) and (C[i] > C[i + 1]) and (C[i] > mn_thr_charness): mx_above.append((C[i], t)) for thr_i, thr in enumerate(charness_threshes): if (C[i] > thr) != is_above[thr_i] \ or (C[i] > mx_thr_charness and not is_above[thr_i]): step = 15 * (len(charness_threshes) - thr_i) \ if is_above[thr_i] \ else -15 * thr_i if is_above[thr_i]: if 'down' not in crossings: crossings['down'] = (C[i], t) # else: # assert crossings['down'][0] > C[i], (crossings['down'][0], C[i]) else: if 'up' not in crossings: crossings['up'] = (C[i - 1], t) elif crossings['up'][0] < C[i - 1]: crossings['up'] = (C[i - 1], t) # ax.annotate("%.3f\n#%d" % (C[i], t), xy=(t, C[i]), # xytext=(t + step, C[i]-0.1), # arrowprops=dict(facecolor='none', shrink=0.03)) if C[i] < mn_thr_charness and is_above[thr_i]: try: c, t = max((e for e in mx_above), key=lambda e: e[0]) ax.annotate("%.3f\n#%d" % (c, t), xy=(t, c), xytext=(t + step, c + 0.1), arrowprops=dict(facecolor='none', shrink=0.03)) mx_above = [] windows.append( (crossings['up'][1], t, crossings['down'][1])) except (KeyError, ValueError): lg.warning("Can't find gap: %s, %s" % (crossings, mx_above)) crossings = {} is_above[thr_i] = C[i] > thr break for crossing in windows: for i, t in enumerate(crossing): c = skeleton.charness_poses[t] step = -15 + i * 10 ax.annotate("%.3f\n#%d" % (c, t), xy=(t, c), xytext=(t + step, c - 0.1), arrowprops=dict(facecolor='none', shrink=0.03)) # extract_gaps([args.video]) # labels ax.set_title("Scores and charness w.r.t time: max charness: #%d %g" % (charness_sorted[-1][0], charness_sorted[-1][1])) ax.set_xlabel('integer time') ax.legend(loc='lower right') ax.grid(True) ax.yaxis.grid(which='both') ax.xaxis.set_ticks(np.arange(T[0] - 1, T[-1] + 1, 5)) ax.set_yticks([]) ax.set_ylim(0., 1.) ax.set_ylabel('higher is better') plt.suptitle("%s" % name_query) with open(os.path.join(d, 'charness_rank.csv'), 'w') as fout: fout.write("frame_id,charness\n") for fid_charness in reversed(charness_sorted): fout.write("{:d},{:g}\n".format(*fid_charness)) print(fid_charness) # plt.show() p_out = os.path.join(d, 'charnesses.svg') plt.savefig(p_out) lg.debug("saved to %s" % p_out)
def main(argv): pjoin = os.path.join parser = argparse.ArgumentParser("") parser.add_argument('d', type=_check_exists, help="Input directory") parser.add_argument('db', type=_check_exists, help="Path to scenelets database") parser.add_argument( '-o', '--opt-folder', help="Which optimization output to process. Default: opt1", default='opt1') parser.add_argument('-limit', type=int, help="How many scenelets to aggregate.", default=3) parser.add_argument('--c-threshold', help='Distance threshold. Default: 0.3', type=float, default=0.3) parser.add_argument('--d-threshold', help='Distance threshold. Default: 0.4', type=float, default=0.4) args = parser.parse_args(argv) path_scenes_root = os.path.normpath(pjoin(args.d, os.pardir)) print("Working with %s" % args.d) d = pjoin(args.d, args.opt_folder) name_query = d.split(os.sep)[-2] query3d = Scenelet.load( os.path.join(args.d, 'skel_{:s}_unannot.json'.format(name_query))) n_actors = query3d.skeleton.n_actors # type: int log = LoggedSequence() elements = [[], [], []] spans = [] above = [] for p in sorted(os.listdir(d)): d_time = pjoin(d, p) if not os.path.isdir(d_time) or 'bak' in p: continue parts = p.split('_') start = int(parts[0]) end = int(parts[-1]) sum_charness = 0. sum_weight = 0. sum_charness_unw = 0. sum_weight_unw = 0 diff_succ, diff = False, 100. for f in glob.iglob("%s/skel_%s_*.json" % (d_time, name_query)): rank = int(os.path.splitext(f)[0].rpartition('_')[-1]) if rank >= args.limit: continue # print(f) data = json.load(open(f, 'r')) charness = data['charness'] weight = max(0., .1 - data['score_fit']) sum_charness += weight * charness sum_weight += weight sum_charness_unw += charness elements[sum_weight_unw].append(charness) sum_weight_unw += 1 if rank == 0: diff, diff_succ = get_pose_distance(query3d=query3d, path_match=f, gap=(start, end)) if sum_weight > 0.: sum_charness /= sum_weight if sum_weight_unw > 0.: sum_charness_unw /= sum_weight_unw frame_id = (start + end) // 2 actor_id = query3d.skeleton.get_actor_id(frame_id=frame_id) \ if n_actors > 1 else 0 spans.append( Span(start, end, sum_charness, sum_charness_unw, actor_id=actor_id)) # check for pose replacement if diff_succ and diff > args.d_threshold: # time = query3d.skeleton.get_time(frame_id=frame_id) above.append(Span2(start=frame_id, end=frame_id, value=diff)) if diff_succ: log.add_point(value=diff, time=frame_id) for actor_id in range(n_actors): cs, span_ids = zip(*[(span.charness, span_id) for span_id, span in enumerate(spans) if span.actor_id == actor_id]) cs2 = gaussian_filter(cs, sigma=2.5).tolist() for smoothed, span_id in zip(cs2, span_ids): spans[span_id].smoothed_charness = smoothed plt.figure() plt.plot(cs, label="orig") plt.plot([span.smoothed_charness for span in spans], label="smoothed") # c0 = [span.charness_unw for span in spans] # plt.plot(c0, 'o-', label="unweighted") # for sigma in [2.5]: # cs2 = gaussian_filter(cs, sigma=sigma) # plt.plot(cs2, '+-', label="%f" % sigma) # for i, elem in enumerate(elements): # plt.plot(elem, 'x-', label="cand%d" % i) plt.legend() # plt.show() p_graph = os.path.join(args.d, 'charness2.svg') plt.savefig(p_graph) log.plot(path_root=args.d) spans = sorted(spans, key=lambda s: s.smoothed_charness, reverse=True) chosen = [spans[0]] for span_id in range(1, len(spans)): span = spans[span_id] overlap = next((c for c in chosen if span.overlaps(c)), None) if overlap is not None: # print("Skipping %s because overlaps %s" % (span, overlap)) continue elif span.start == query3d.skeleton.get_frames()[0] \ or span.end == query3d.skeleton.get_frames()[-1]: print("Skipping %s because first/last frame." % span) continue else: chosen.append(span) # Spans to replace, because of quality, if not replaced already spans_r = group_spans(above=above) chosen, spans_r_filtered = filter_spans_to_replace( spans=spans_r, chosen=chosen, c_threshold=args.c_threshold) print('Filtered: {}\nchosen: {}'.format(spans_r_filtered, chosen)) # keep at least one characteristic pose for each actor # actors_output = set() p_cmd = os.path.join(args.d, 'gap_command_new.sh') with open(p_cmd, 'w') as f: f.write('BATCH_SIZE=8\n\n') for actor_id in range(n_actors): f.write('######\n# Actor {:d}\n######\n\n'.format(actor_id)) for span in chain(chosen, spans_r_filtered): if not span.actor_id == actor_id: continue surr = find_surrounding(span, spans) cmd = construct_command(name_query=name_query, span=span, tc=args.c_threshold, surr=surr, path_scenes_root=path_scenes_root, path_scenelets_db=args.db) if isinstance(span.charness, float): f.write("# charness: %g\n" % span.charness) # Comment out if too low # if span.charness < args.c_threshold \ # and actor_id in actors_output: # actors_output.add(actor_id) f.write('{}\n\n'.format(cmd)) f.write('\n\n')
def export_scenelet(um, o_pos_3d, o_polys_3d, query_full_skeleton, scenes, joints_active, transform_id=None): """Extract a scenelet (poses and objects) from the data from the optimized problem. Args: um (stealth.pose.unk_manager.UnkManager): Data manager. o_pos_3d (np.ndarray): Output 3D poses. o_polys_3d (np.ndarray): (6K, 4, 3) 3D oriented bounding boxes stored stacked. query_full_skeleton (stealth.logic.skeleton.Skeleton): Initial path containing time information. joints_active (list): List of joint_ids that were optimized for. Usage: pose16[:, joints_active] = o_pos_3d[pid, :, :] transform_id (int): Export only a specific group. Everything is exported, if None. Returns: A scenelet extracted from the data provided. """ # cache function _guess_time_at = query_full_skeleton.guess_time_at # all poses or the ones that belong to a group/scenelet if transform_id is None: pids_sorted = sorted([(pid, pid2scene) for pid, pid2scene in um.pids_2_scenes.items()], key=lambda e: e[1].frame_id) else: # pids_sorted = sorted([(pid, pid2scene) # for pid, pid2scene in um.pids_2_scenes.items() # if pid2scene.transform_id == transform_id], # key=lambda e: e[1].frame_id) pids_2_scenes = um.pids_2_scenes pids_sorted = sorted([(pid, pids_2_scenes[pid]) for pid in um.get_pids_for(transform_id)], key=lambda e: e[1].frame_id) # create output scenelet o = Scenelet() charness = None # # Skeleton # # cache skeleton reference skeleton = o.skeleton # fill skeleton for pid, pid2scene in pids_sorted: if charness is None: scene = scenes[pid2scene.id_scene] charness = scene.charness o.add_aux_info('name_scenelet', scene.name_scenelet) o.charness = charness # get frame_id frame_id = int(pid2scene.frame_id) # check if already exists if skeleton.has_pose(frame_id): # TODO: fix overlapping frame_ids lg.warning("[export_scenelet] Overwriting output frame_id %d" % frame_id) # add with time guessed from input skeleton rate pose = np.zeros((3, Joint.get_num_joints())) pose[:, joints_active] = o_pos_3d[pid, :, :] pose[:, Joint.PELV] = (pose[:, Joint.LHIP] + pose[:, Joint.RHIP]) / 2. pose[:, Joint.NECK] = (pose[:, Joint.HEAD] + pose[:, Joint.THRX]) / 2. # for j, jid in joints_remap.items(): # pose[:, j] = o_pos_3d[pid, :, jid] assert not skeleton.has_pose(frame_id=frame_id), \ 'Already has pose: {}'.format(frame_id) skeleton.set_pose(frame_id=frame_id, pose=pose, time=_guess_time_at(frame_id)) # # Objects # scene_obj = None scene_obj_oid = 0 # unique identifier that groups parts to objects for polys2scene in um.polys2scene.values(): # Check, if we are restricted to a certain group if transform_id is not None \ and polys2scene.transform_id != transform_id: continue start = polys2scene.poly_id_start end = start + polys2scene.n_polys # 6 x 4 x 3 polys = o_polys_3d[start:end, ...] assert polys.shape[0] == 6, "Assumed cuboids here" if scene_obj is None or scene_obj_oid != polys2scene.object_id: category = next(cat for cat in CATEGORIES if CATEGORIES[cat] == polys2scene.cat_id) scene_obj = SceneObj(label=category) scene_obj_oid = polys2scene.object_id o.add_object(obj_id=-1, scene_obj=scene_obj, clone=False) part = scene_obj.add_part(part_id=-1, label_or_part=polys2scene.part_label) # TODO: average for numerical precision errors centroid = np.mean(polys, axis=(0, 1)) ax0 = polys[0, 1, :] - polys[0, 0, :] scale0 = np.linalg.norm(ax0) ax0 /= scale0 ax1 = polys[0, 3, :] - polys[0, 0, :] scale1 = np.linalg.norm(ax1) ax1 /= scale1 ax2 = polys[1, 0, :] - polys[0, 0, :] scale2 = np.linalg.norm(ax2) ax2 /= scale2 part.obb = Obb(centroid=centroid, axes=np.concatenate( (ax0[:, None], ax1[:, None], ax2[:, None]), axis=1), scales=[scale0, scale1, scale2]) # if scene_obj is not None: # o.add_object(obj_id=-1, scene_obj=scene_obj, clone=False) # else: # lg.warning("No objects in scenelet?") # scene_obj = SceneObj('couch') # for poly_id in range(0, o_polys_3d.shape[0], 6): # rects = o_polys_3d[poly_id : poly_id + 6, ...] # # lg.debug("rects:\n%s" % rects) # scene_obj.add_part(poly_id, 'seat') # # # fig = plt.figure() # # ax = fig.add_subplot(111, projection='3d') # # for rid, rect in enumerate(rects): # # wrapped = np.concatenate((rect, rect[0:1, :]), axis=0) # # ax.plot(wrapped[:, 0], wrapped[:, 2], wrapped[:, 1]) # # for ci in range(4): # # c = rect[ci, :] # # ax.text(c[0], c[2], c[1], s="%d, %d, %d" # # % (poly_id, rid, ci)) # # if rid >= 1: # # break # # # # plt.show() # part = scene_obj.get_part(poly_id) # centroid = np.mean(rects, axis=(0, 1)) # ax0 = rects[0, 1, :] - rects[0, 0, :] # scale0 = np.linalg.norm(ax0) # ax0 /= scale0 # ax1 = rects[0, 3, :] - rects[0, 0, :] # scale1 = np.linalg.norm(ax1) # ax1 /= scale1 # ax2 = rects[1, 0, :] - rects[0, 0, :] # scale2 = np.linalg.norm(ax2) # ax2 /= scale2 # part.obb = Obb(centroid=centroid, # axes=np.concatenate(( # ax0[:, None], ax1[:, None], ax2[:, None] # ), axis=1), # scales=[scale0, scale1, scale2]) # o.add_object(obj_id=99, scene_obj=scene_obj, # clone=False) return o
def main(argv): conf = Conf.get() parser = argparse.ArgumentParser("Denis pose converter") parser.add_argument('camera_name', help="Camera name ('G15', 'S6')", type=str) parser.add_argument( '-d', dest='dir', required=True, help="Path to the <scene folder>/denis containing skeletons.json") parser.add_argument( '-filter', dest='with_filtering', action="store_true", help="Should we do post-filtering (1-euro) on the pelvis positions") parser.add_argument('-huber', required=False, help="Should we do huber loss?", action='store_true') parser.add_argument('-smooth', type=float, default=0.005, help="Should we have a smoothness term (l2/huber)?") parser.add_argument( '--winsorize-limit', type=float, default=conf.optimize_path.winsorize_limit, help='Threshold for filtering too large jumps of the 2D centroid') parser.add_argument('--no-resample', action='store_true', help="add resampled frames") parser.add_argument('--n-actors', type=int, default=1, help="How many skeletons to track.") parser.add_argument('-n-actors', type=int, default=1, help="Max number of people in scene.") # parser.add_argument( # '-r', type=float, # help='Video rate. Default: 1, if avconv -r 5. ' # 'Original video sampling rate (no subsampling) should be ' # '24/5=4.8. avconv -r 10 leads to 24/10=2.4.', # required=True) parser.add_argument('--person_height', type=float, help='Assumed height of human(s) in video.', default=Conf.get().optimize_path.person_height) parser.add_argument( '--forwards-window-size', type=int, help='How many poses in time to look before AND after to ' 'average forward direction. 0 means no averaging. Default: 0.', default=0) parser.add_argument('--no-img', action='store_true', help='Read and write images (vis reproj error)') parser.add_argument('--postfix', type=str, help="output file postfix.", default='unannot') args = parser.parse_args(argv) show = False args.resample = not args.no_resample # assert not args.resample, "resample should be off" assert os.path.exists(args.dir), "Source does not exist: %s" % args.dir p_scene = os.path.normpath(os.path.join(args.dir, os.pardir)) # type: str p_video_params = os.path.join(p_scene, 'video_params.json') assert os.path.exists(p_video_params), "Need video_params.json for rate" if 'r' not in args or args.r is None: args.r = json.load(open(p_video_params, 'r'))['rate-avconv'] # manual parameters (depth initialization, number of actors) p_scene_params = os.path.join(args.dir, os.pardir, 'scene_params.json') if not os.path.exists(p_scene_params): scene_params = { 'depth_init': 10., 'actors': args.n_actors, 'ground_rot': [0., 0., 0.] } json.dump(scene_params, open(p_scene_params, 'w')) raise RuntimeError("Inited scene_params.json, please check: %s" % p_scene_params) else: scene_params = json.load(open(p_scene_params, 'r')) lg.warning("Will work with %d actors and init depth to %g" % (scene_params['actors'], scene_params['depth_init'])) assert '--n-actors' not in argv \ or args.n_actors == scene_params['actors'], \ "Actor count mismatch, remove %d from args, because " \ "scene_params.json says %d?" \ % (args.n_actors, scene_params['actors']) args.n_actors = scene_params['actors'] ground_rot = scene_params['ground_rot'] or [0., 0., 0.] # load images path_images = os.path.abspath(os.path.join(args.dir, os.pardir, 'origjpg')) images = {} shape_orig = None if not args.no_img: images, shape_orig = load_images(path_images) path_skeleton = \ max((f for f in os.listdir(os.path.join(args.dir)) if f.startswith('skeletons') and f.endswith('json')), key=lambda s: int(os.path.splitext(s)[0].split('_')[1])) print("path_skeleton: %s" % path_skeleton) data = json.load(open(os.path.join(args.dir, path_skeleton), 'r')) # data, pose_constraints, first_run = \ # cleanup(data, p_dir=os.path.join(args.dir, os.pardir)) # poses_2d = [] # plt.figure() # show_images(images, data) if False: # pose_ids = identify_actors_multi(data, n_actors=1) p_segm_pickle = os.path.join(args.dir, os.pardir, "label_skeletons.pickle") problem = None if False and os.path.exists(p_segm_pickle): lg.warning("Loading skeleton segmentation from pickle %s" % p_segm_pickle) pose_ids, problem = pickle_load(open(p_segm_pickle, 'rb')) if not problem or problem._n_actors != args.n_actors: pose_ids, problem, data = more_actors_gurobi( data, n_actors=args.n_actors, constraints=pose_constraints, first_run=first_run) if True or show: show_multi(images, data, pose_ids, problem, p_dir=os.path.join(args.dir, os.pardir), first_run=first_run, n_actors=args.n_actors) pickle.dump((pose_ids, problem), open(p_segm_pickle, 'wb'), -1) else: pose_ids = greedy_actors(data, n_actors=args.n_actors) data = DataPosesWrapper(data=data) visible_f = {a: {} for a in range(args.n_actors)} visible_f_max = 0. if show: plt.ion() fig = None axe = None scatters = dict() # how many images we have min_frame_id = min(f for f in pose_ids) frames_mod = max(f for f in pose_ids) - min_frame_id + 1 skel_ours = Skeleton(frames_mod=frames_mod, n_actors=args.n_actors, min_frame_id=min_frame_id) skel_ours_2d = Skeleton(frames_mod=frames_mod, n_actors=args.n_actors, min_frame_id=min_frame_id) # assert len(images) == 0 or max(f for f in images) + 1 == frames_mod, \ # "Assumed image count is %d, but max_frame_id is %d" \ # % (len(images), frames_mod-1) if isinstance(data, DataPosesWrapper): frames = data.get_frames() else: frames = [] for frame_str in sorted(data.get_frames()): try: frame_id = int(frame_str.split('_')[1]) except ValueError: print("skipping key %s" % frame_id) continue frames.append(frame_id) my_visibilities = [[], []] for frame_id in frames: frame_str = DataPosesWrapper._to_frame_str(frame_id) pose_in = data.get_poses_3d(frame_id=frame_id) # np.asarray(data[frame_str][u'centered_3d']) # pose_in_2d = np.asarray(data[frame_str][u'pose_2d']) pose_in_2d = data.get_poses_2d(frame_id=frame_id) # visible = np.asarray(data[frame_str][u'visible']) if False and len(pose_in.shape) > 2: pose_id = pose_ids[frame_id] if not args.no_img: im = cv2.cvtColor(images[frame_id], cv2.COLOR_RGB2BGR) for i in range(pose_in.shape[0]): c = (1., 0., 0., 1.) if i == pose_id: c = (0., 1., 0., 1.) color = tuple(int(c_ * 255) for c_ in c[:3]) for p2d in pose_in_2d[i, :, :]: # color = (c[0] * 255, c[1] * 255., c[2] * 255.) cv2.circle(im, (p2d[1], p2d[0]), radius=3, color=color, thickness=-1) center = np.mean(pose_in_2d[i, :, :], axis=0).round().astype('i4').tolist() cv2.putText(im, "%d" % i, (center[1], center[0]), 1, 1, color) if show: cv2.imshow("im", im) cv2.waitKey(100) # if sid not in scatters: # scatters[sid] = axe.scatter(pose_in_2d[i, :, 1], pose_in_2d[i, :, 0], c=c) # else: # scatters[sid].set_offsets(pose_in_2d[i, :, [1, 0]]) # scatters[sid].set_array(np.tile(np.array(c), pose_in_2d.shape[1])) # scatter.set_color(c) # plt.draw() # plt.pause(1.) pose_in = pose_in[pose_id, :, :] pose_in_2d = pose_in_2d[pose_id, :, :] visible = visible[pose_id] # else: # pose_id = 0 # pose_id = pose_ids[frame_id] for actor_id in range(args.n_actors): # if actor_id in (2, 3, 4, 5, 8, 9) # expanded frame_id frame_id2 = Skeleton.unmod_frame_id(frame_id=frame_id, actor_id=actor_id, frames_mod=frames_mod) assert (actor_id != 0) ^ (frame_id2 == frame_id), "no" frame_id_mod = skel_ours.mod_frame_id(frame_id=frame_id2) assert frame_id_mod == frame_id, \ "No: %d %d %d" % (frame_id, frame_id2, frame_id_mod) actor_id2 = skel_ours.get_actor_id(frame_id2) assert actor_id2 == actor_id, "no: %s %s" % (actor_id, actor_id2) # which pose explains this actor in this frame pose_id = pose_ids[frame_id][actor_id] # check, if actor found if pose_id < 0: continue # 3D pose pose = pose_in[pose_id, :, JointDenis.revmap].T # added by Aron on 4/4/2018 (Denis' pelvis is too high up) pose[:, Joint.PELV] = (pose[:, Joint.LHIP] + pose[:, Joint.RHIP]) \ / 2. skel_ours.set_pose(frame_id2, pose) # 2D pose pose_2d = pose_in_2d[pose_id, :, :] arr = np.array(JointDenis.pose_2d_to_ours(pose_2d), dtype=np.float32).T skel_ours_2d.set_pose(frame_id2, arr) # # visibility (binary) and confidence (float) # # np.asarray(data[frame_str][u'visible'][pose_id]) vis_i = data.get_visibilities(frame_id)[pose_id] # vis_f = np.asarray(data[frame_str][u'visible_float'][pose_id]) vis_f = data.get_confidences(frame_id)[pose_id] for jid, visible in enumerate(vis_i): # for each joint # binary visibility jid_ours = JointDenis.to_ours_2d(jid) skel_ours_2d.set_visible(frame_id2, jid_ours, visible) # confidence (fractional visibility) if np.isnan(vis_f[jid]): continue try: visible_f[actor_id][frame_id2][jid_ours] = vis_f[jid] except KeyError: visible_f[actor_id][frame_id2] = {jid_ours: vis_f[jid]} visible_f_max = max(visible_f_max, vis_f[jid]) conf_ = get_conf_thresholded(vis_f[jid], thresh_log_conf=None, dtype_np=np.float32) skel_ours_2d.set_confidence(frame_id=frame_id2, joint=jid_ours, confidence=conf_) my_visibilities[0].append(vis_f[jid]) my_visibilities[1].append(conf_) skel_ours_2d._confidence_normalized = True plt.figure() plt.plot(my_visibilities[0], my_visibilities[1], 'o') plt.savefig('confidences.pdf') assert skel_ours.n_actors == args.n_actors, "no" assert skel_ours_2d.n_actors == args.n_actors, "no" # align to room min_z = np.min(skel_ours.poses[:, 2, :]) print("min_max: %s, %s" % (min_z, np.max(skel_ours.poses[:, 2, :]))) skel_ours.poses[:, 2, :] += min_z skel_ours.poses /= 1000. # The output is scaled to 2m by Denis. # We change this to 1.8 * a scale in order to correct for # the skeletons being a bit too high still. skel_ours.poses *= \ args.person_height * conf.optimize_path.height_correction / 2. skel_ours.poses[:, 2, :] *= -1. skel_ours.poses = skel_ours.poses[:, [0, 2, 1], :] # refine name_video = args.dir.split(os.sep)[-2] out_path = os.path.join(args.dir, os.pardir, "skel_%s_%s.json" % (name_video, args.postfix)) out_path_orig = os.path.join(args.dir, os.pardir, "skel_%s_lfd_orig.json" % name_video) sclt_orig = Scenelet(skeleton=copy.deepcopy(skel_ours)) sclt_orig.save(out_path_orig) skel_ours_2d_all = copy.deepcopy(skel_ours_2d) assert len(skel_ours_2d_all.get_frames()), skel_ours_2d_all.get_frames() # # Optimize # # frames_ignore = [(282, 372), (516, 1000)] skel_ours, skel_ours_2d, intrinsics, \ frame_ids_filled_in = prepare( args.camera_name, winsorize_limit=args.winsorize_limit, shape_orig=shape_orig, path_scene=p_scene, skel_ours_2d=skel_ours_2d, skel_ours=skel_ours, resample=args.resample, path_skel=path_skeleton) frames_ignore = [] tr_ground = np.eye(4, dtype=np.float32) skel_opt, out_images, K = \ optimize_path( skel_ours, skel_ours_2d, images, intrinsics=intrinsics, path_skel=out_path, shape_orig=shape_orig, use_huber=args.huber, weight_smooth=args.smooth, frames_ignore=frames_ignore, resample=args.resample, depth_init=scene_params['depth_init'], ground_rot=ground_rot) for frame_id in skel_opt.get_frames(): skel_opt.set_time(frame_id=frame_id, time=float(frame_id) / args.r) skel_opt_raw = copy.deepcopy(skel_opt) skel_opt_resampled = Skeleton.resample(skel_opt) # Filter pelvis if args.with_filtering: out_filter_path = os.path.join(args.dir, os.pardir, "vis_filtering") skel_opt = filter_(skel_opt_resampled, out_filter_path=out_filter_path, skel_orig=skel_opt, weight_smooth=args.smooth, forwards_window_size=args.forwards_window_size) else: skel_opt.estimate_forwards(k=args.forwards_window_size) skel_opt_resampled.estimate_forwards(k=args.forwards_window_size) # if len(images): # skel_opt.fill_with_closest(images.keys()[0], images.keys()[-1]) min_y, max_y = skel_opt.get_min_y(tr_ground) print("min_y: %s, max_y: %s" % (min_y, max_y)) # # save # frame_ids_old = set(skel_opt.get_frames()) if args.resample: skel_opt = skel_opt_resampled frame_ids_filled_in.update( set(skel_opt.get_frames()).difference(frame_ids_old)) lg.warning("Saving resampled scenelet!") scenelet = Scenelet(skel_opt) del skel_opt # skel_dict = skel_opt.to_json() tr_ground[1, 3] = min_y scenelet.aux_info['ground'] = tr_ground.tolist() assert isinstance(ground_rot, list) and len(ground_rot) == 3 scenelet.add_aux_info('ground_rot', ground_rot) scenelet.add_aux_info( 'path_opt_params', { 'rate': args.r, 'w-smooth': args.smooth, 'winsorize-limit': args.winsorize_limit, 'camera': args.camera_name, 'huber': args.huber, 'height_correction': conf.optimize_path.height_correction, 'focal_correction': conf.optimize_path.focal_correction }) scenelet.add_aux_info('frame_ids_filled_in', list(frame_ids_filled_in)) # To MATLAB # _skeleton.get_min_y(_tr_ground) # with skel_opt as skeleton: # skeleton = skel_opt # skeleton_name = os.path.split(args.dir)[0] # skeleton_name = skeleton_name[skeleton_name.rfind('/')+1:] # mdict = skeleton.to_mdict(skeleton_name) # mdict['room_transform'] = tr_ground # mdict['room_transform'][1, 3] *= -1. # print(mdict) # print("scene_name?: %s" % os.path.split(args.dir)[0]) # skeleton.save_matlab( # os.path.join(os.path.dirname(args.dir), "skeleton_opt.mat"), # mdict=mdict) assert scenelet.skeleton.has_forwards(), "No forwards??" scenelet.save(out_path) if show: # save path plot out_path_path = os.path.join(args.dir, os.pardir, "%s_path.jpg" % name_video) path_fig = plot_path(scenelet.skeleton) legend = ["smooth %g" % args.smooth] # hack debug # path_skel2 = os.path.join(args.dir, os.pardir, 'skel_lobby7_nosmooth.json') # if os.path.exists(path_skel2): # skel2 = Skeleton.load(path_skel2) # path_fig = plot_path(skel2, path_fig) # legend.append('no smooth') if show: plt.legend(legend) path_fig.savefig(out_path_path) # backup args path_args = os.path.join(args.dir, os.pardir, 'args_denis.txt') with open(path_args, 'a') as f_args: f_args.write("%s %s\n" % (os.path.basename(sys.executable), " ".join(argv))) # save 2D detections to file if args.postfix == 'unannot': path_skel_ours_2d = os.path.join( args.dir, os.pardir, "skel_%s_2d_%02d.json" % (name_video, 0)) sclt_2d = Scenelet(skel_ours_2d_all) print('Saving {} to {}'.format(len(skel_ours_2d_all.get_frames()), path_skel_ours_2d)) sclt_2d.skeleton.aux_info = {} sclt_2d.save(path_skel_ours_2d) else: print(args.postfix) logging.info("Saving images...") if len(images) and len(out_images): path_out_images = os.path.join(args.dir, os.pardir, 'color') try: os.makedirs(path_out_images) except OSError: pass visible_f_max_log = np.log(visible_f_max) frames = list(out_images.keys()) for frame_id in range(frames[0], frames[-1] + 1): im = out_images[frame_id] if frame_id in out_images \ else cv2.cvtColor(images[frame_id], cv2.COLOR_BGR2RGB) for actor_id in range(args.n_actors): if frame_id in visible_f[actor_id]: frame_id2 = skel_ours_2d_all.unmod_frame_id( frame_id=frame_id, actor_id=actor_id, frames_mod=skel_ours_2d_all.frames_mod) for joint, is_vis in visible_f[actor_id][frame_id].items(): p2d = skel_ours_2d_all.get_joint_3d(joint, frame_id=frame_id2) # radius = np.log(is_vis) / visible_f_max_log # lg.debug("r0: %g" % radius) # radius = np.exp(np.log(is_vis) / visible_f_max_log) # lg.debug("radius is %g" % radius) vis_bool = True if skel_ours_2d_all.has_visible(frame_id=frame_id2, joint_id=joint): vis_bool &= skel_ours_2d_all.is_visible( frame_id2, joint) radius = abs(np.log(is_vis / 0.1 + 1e-6)) if not np.isnan(radius): p2d = (int(round(p2d[0])), int(round(p2d[1]))) cv2.circle(im, center=p2d, radius=int(round(radius)), color=(1., 1., 1., 0.5), thickness=1) conf = get_conf_thresholded(conf=is_vis, thresh_log_conf=None, dtype_np=np.float32) if conf > 0.5: cv2.putText(img=im, text=Joint(joint).get_name(), org=p2d, fontFace=1, fontScale=1, color=(10., 150., 10., 100.)) # lg.debug("set visibility to %g, radius %g" % (is_vis, radius)) # if frame_id in out_images: scale = (shape_orig[1] / float(im.shape[1]), shape_orig[0] / float(im.shape[0])) cv2.imwrite( os.path.join(path_out_images, "color_%05d.jpg" % frame_id), cv2.resize(im, (0, 0), fx=scale[0], fy=scale[1], interpolation=cv2.INTER_CUBIC)) # else: # fname = "color_%05d.jpg" % frame_id # shutil.copyfile( # os.path.join(path_images, fname), # os.path.join(path_out_images, fname)) lg.info("Wrote images to %s/" % path_out_images)
def prepare(camera_name, winsorize_limit, shape_orig, path_scene, skel_ours_2d, skel_ours, resample, path_skel): """ Args: camera_name (str): Name of camera for intrinsics calculation. winsorize_limit (float): Outlier detection threshold. shape_orig (Tuple[int, int]): Original video resolution. path_scene (str): Root path to scene. skel_ours_2d (np.ndarray): (N, 2, 16) 2D skeletons from LFD in our format. skel_ours (np.ndarray): (N, 3, 16) Local space 3D skeletons in iMapper coordinate frame (y-down, z-front). resample (bool): If needs densification using Blender's IK engine. Returns: skel_ours (Skeleton): skel_ours_2d (Skeleton): intrinsics (np.ndarray): """ assert camera_name is not None and isinstance(camera_name, str), \ "Need a camera name" if shape_orig is None: shape_orig = (np.float32(1080.), np.float32(1920.)) np.set_printoptions(linewidth=200, suppress=True) if False: plt.figure() for i, frame_id in enumerate(skel_ours.get_frames()): plot_2d(skel_ours_2d.get_pose(frame_id), images[frame_id]) plt.show() path_intrinsics = os.path.join(path_scene, "intrinsics.json") if os.path.exists(path_intrinsics): lg.warning("Loading existing intrinsics matrix!") K = np.array(json.load(open(path_intrinsics, 'r')), dtype=np.float32) scale = (shape_orig[1] / int(round(shape_orig[1] * float(INPUT_SIZE) / shape_orig[0])), shape_orig[0] / float(INPUT_SIZE)) K[0, 0] /= scale[0] K[0, 2] /= scale[0] K[1, 1] /= scale[1] K[1, 2] /= scale[1] else: K = intrinsics_matrix(INPUT_SIZE, shape_orig, camera_name) focal_correction = Conf.get().optimize_path.focal_correction if abs(focal_correction - 1.) > 1.e-3: lg.warning("Warning, scaling intrinsics matrix by %f" % focal_correction) K[0, 0] *= focal_correction K[1, 1] *= focal_correction #print("K:\n%s,\nintr:\n%s" % (K, intr)) # sys.exit(0) # # Prune poses # skel_ours_2d, frame_ids_removed = filter_outliers( skel_ours_2d, winsorize_limit=winsorize_limit, show=False) frames_to_remove_3d = filter_wrong_poses(skel_ours_2d, skel_ours) frames_to_ignore_list = set() # if frames_ignore is not None: # for start_end in frames_ignore: # if isinstance(start_end, tuple): # l_ = list(range( # start_end[0], # min(start_end[1], skel_ours_2d.get_frames()[-1]))) # frames_to_remove_3d.extend(l_) # frames_to_ignore_list.update(l_) # else: # assert isinstance(start_end, int), \ # "Not int? %s" % repr(start_end) # frames_to_remove_3d.append(start_end) # frames_to_ignore_list.add(start_end) for frame_id in skel_ours.get_frames(): if frame_id in frames_to_remove_3d: skel_ours.remove_pose(frame_id) # resample skeleton to fill in missing frames skel_ours_old = skel_ours frame_ids_filled_in = set(skel_ours_2d.get_frames()).difference( set(skel_ours_old.get_frames())) if resample: lg.warning("Resampling BEFORE optimization") # frames_to_resample = sorted(set(skel_ours_2d.get_frames()).difference( # frames_to_ignore_list)) # skel_ours = Skeleton.resample(skel_ours_old, # frame_ids=frames_to_resample) # Aron on 6/4/2018 sclt_ours = Scenelet(skeleton=skel_ours) stem = os.path.splitext(path_skel)[0] path_filtered = "%s_filtered.json" % stem path_ipoled = "%s_ikipol.json" % os.path.splitext(path_filtered)[0] if not os.path.exists(path_ipoled): sclt_ours.save(path_filtered) script_filepath = \ os.path.normpath(os.path.join( os.path.dirname(os.path.abspath(__file__)), os.pardir, 'blender', 'ipol_ik.py')) assert os.path.exists(script_filepath), "No: %s" % script_filepath blender_path = os.environ.get('BLENDER') if not os.path.isfile(blender_path): raise RuntimeError( "Need \"BLENDER\" environment variable to be set " "to the blender executable") cmd_params = [ blender_path, '-noaudio', '-b', '-P', script_filepath, '--', path_filtered ] print("calling %s" % " ".join(cmd_params)) ret = check_call(cmd_params) print("ret: %s" % ret) else: lg.warning("\n\n\tNOT recomputing IK interpolation, " "file found at %s!\n" % path_ipoled) skel_ours = Scenelet.load(path_ipoled, no_obj=True).skeleton # remove extra frames at ends and beginnings of actors spans = skel_ours_old.get_actor_empty_frames() old_frames = skel_ours_old.get_frames() frames_to_remove = [] for frame_id in skel_ours.get_frames(): if frame_id not in old_frames: in_spans = next( (True for span in spans if span[0] < frame_id < span[1]), None) if in_spans: frames_to_remove.append(frame_id) # lg.debug("diff: %s (a%s, f%s)" # % ( # frame_id, # skel_ours_old.get_actor_id(frame_id), # skel_ours_old.mod_frame_id(frame_id) # )) for frame_id in frames_to_remove: skel_ours.remove_pose(frame_id) for frame_id in skel_ours_2d.get_frames(): if not skel_ours.has_pose(frame_id): skel_ours_2d.remove_pose(frame_id) for frame_id in skel_ours.get_frames(): if not skel_ours_2d.has_pose(frame_id): skel_ours.remove_pose(frame_id) frames_set_ours = set(skel_ours.get_frames()) frames_set_2d = set(skel_ours_2d.get_frames()) if frames_set_ours != frames_set_2d: print("Frame mismatch: %s" % frames_set_ours.difference(frames_set_2d)) lg.warning("Removing pelvis and neck from 2D input") for frame_id in skel_ours_2d.get_frames(): skel_ours_2d.set_visible(frame_id, Joint.PELV, 0) skel_ours_2d.set_visible(frame_id, Joint.NECK, 0) return skel_ours, skel_ours_2d, K, frame_ids_filled_in
def extract_annotated_scenelet( scene, prefix_obj='obb', frame_ids=None, frame_multiplier=1., time_multiplier=1., f_ob_is_joint=lambda ob: ob.name.startswith( 'Output') and ob.name.endswith('Sphere'), f_joint_name_from_ob=lambda ob: ob.name.split('.')[1]): """ Args: scene (bpy.types.Scene): The current scene (e.g. bpy.context.scene). prefix_obj (str): Start of object names that we want to include in the scenelet as oriented bounding boxes. frame_ids (List[int]): A subset of frame IDs to export. frame_multiplier (float): Scaling for frame IDs. The result will be rounded and truncated. output.frame_id := int(round(frame_id * frame_multiplier)) time_multipler (float): Scaling for times associated with frame_ids. output.time := int(round(frame_id * frame_multiplier)) * time_multiplier. f_ob_is_joint (Callable[[bpy.types.Object], bool]]): Decides if a Blender object is a joint. f_joint_name_from_ob (Callable[[bpy.types.Object], str]): Gets the joint name from the Blender object name. """ # joints = { # ob.name.split('.')[1]: ob # for ob in bpy.data.objects # if ob.name.startswith('Output') and ob.name.endswith('Sphere')} joints = { f_joint_name_from_ob(ob): ob for ob in bpy.data.objects if f_ob_is_joint(ob) } print("joints: %s" % joints) skeleton = Skeleton() if len(joints): assert len(joints) == 16, "No: %s" % len(joints) if not frame_ids: frame_ids = range(scene.frame_start, scene.frame_end + 1) for frame_id in frame_ids: o_frame_id = int(round(frame_id * frame_multiplier)) if skeleton.has_pose(o_frame_id): print("skipping %s" % frame_id) continue print("frame_id: %s" % frame_id) scene.frame_set(frame_id) bpy.context.scene.update() # bpy.ops.anim.change_frame(frame_id) pose = np.zeros(shape=(3, len(joints))) for joint, ob in joints.items(): pos = ob.matrix_world.col[3] print("pos[%s]: %s" % (ob.name, pos)) joint_id = Joint.from_string(joint) print("joint %s is %s" % (joint, Joint(joint_id))) pose[:, joint_id] = from_blender(pos) print("o_frame: %s from %s" % (o_frame_id, frame_id)) assert not skeleton.has_pose(o_frame_id), \ "Already has %s" % frame_id skeleton.set_pose(frame_id=o_frame_id, pose=pose, time=o_frame_id * time_multiplier) objs_bl = {} for obj in bpy.data.objects: if obj.name.startswith(prefix_obj) and not obj.hide: obj_id = int(obj.name.split('_')[1]) try: objs_bl[obj_id].append(obj) except KeyError: objs_bl[obj_id] = [obj] print("objs: %s" % objs_bl) scenelet = Scenelet(skeleton=skeleton) print("scenelet: %s" % scenelet) for obj_id, parts_bl in objs_bl.items(): name_category = None scene_obj = None for part_id, part_bl in enumerate(parts_bl): transl, rot, scale = part_bl.matrix_world.decompose() rot = rot.to_matrix() if any(comp < 0. for comp in scale): scale *= -1. rot *= -1. assert not any(comp < 0. for comp in scale), "No: %s" % scale matrix_world = part_bl.matrix_world.copy() # need to save full scale, not only half axes for c in range(3): for r in range(3): matrix_world[r][c] *= 2. name_parts = part_bl.name.split('_') if name_category is None: name_category = name_parts[2] scene_obj = SceneObj(label=name_category) else: assert name_category == name_parts[2], \ "No: %s %s" % (name_category, name_parts[2]) name_part = name_parts[3] print("part: %s" % name_part) part = SceneObjPart(name_part) part.obb = Obb(centroid=np.array( from_blender([transl[0], transl[1], transl[2]])), axes=np.array([[rot[0][0], rot[0][1], rot[0][2]], [-rot[2][0], -rot[2][1], -rot[2][2]], [rot[1][0], rot[1][1], rot[1][2]]]), scales=np.array( [scale[0] * 2., scale[1] * 2., scale[2] * 2.])) # if 'table' in name_category: # print(part.obb.axes) # raise RuntimeError("stop") print("obb: %s" % part.obb.to_json(0)) scene_obj.add_part(part_id, part) scenelet.add_object(obj_id, scene_obj, clone=False) return scenelet
def main(argv): parser = argparse.ArgumentParser( "Filter initial path based on distance to full fit") parser.add_argument('skel', help="Skeleton file to filter", type=str) parser.add_argument('--threshold', help='Distance threshold. Default: 0.4', type=float, default=0.4) args = parser.parse_args(argv) lower_body = [ Joint.LKNE, Joint.RKNE, Joint.LANK, Joint.RANK, Joint.LHIP, Joint.RHIP ] print(args.skel) p_root = os.path.dirname(args.skel) p_fit = os.path.join(p_root, 'opt1') assert os.path.isdir(p_fit), p_fit query = Scenelet.load(args.skel) out = Skeleton() data = [] x = [] y = [] y2 = [] for d_ in sorted(os.listdir(p_fit)): d = os.path.join(p_fit, d_) pattern = os.path.join(d, 'skel_*.json') for f in sorted(glob.iglob(pattern)): print(f) assert '00' in f, f sclt = Scenelet.load(f) frames = sclt.skeleton.get_frames() mid_frame = frames[len(frames) // 2] time = sclt.skeleton.get_time(mid_frame) q_frame_id = query.skeleton.find_time(time) q_time = query.skeleton.get_time(q_frame_id) print(time, q_time, f) q_pose = query.skeleton.get_pose(q_frame_id) pose = sclt.skeleton.get_pose(mid_frame) pose[[0, 2]] -= (pose[:, Joint.PELV:Joint.PELV + 1] - q_pose[:, Joint.PELV:Joint.PELV + 1])[[0, 2]] diff = np.mean( np.linalg.norm(q_pose[:, lower_body] - pose[:, lower_body], axis=0)) print(q_frame_id, time, diff) y.append(diff) x.append(q_frame_id) data.append((q_frame_id, diff, time)) if query.skeleton.has_pose(q_frame_id - 1): tmp_pose = copy.deepcopy(q_pose) tmp_pose -= tmp_pose[:, Joint.PELV:Joint.PELV + 1] - query.skeleton.get_pose( q_frame_id - 1)[:, Joint.PELV:Joint.PELV + 1] y2.append( np.mean( np.linalg.norm(pose[:, lower_body] - tmp_pose[:, lower_body], axis=0))) else: y2.append(0.) out.set_pose(frame_id=q_frame_id, time=q_time, pose=pose) break data = smooth(data) plt.plot(x, y, 'x--', label='Distance to best Kinect fit\'s center frame') plt.plot(x, y2, 'o--', label='Distance to prev pose') plt.plot([d[0] for d in data], [d[1] for d in data], 'o--', label='Smoothed') plt.xlabel('Time (s)') plt.ylabel('Sum local squared distance') plt.legend() plt.savefig(os.path.join(p_root, 'tmp.pdf')) Scenelet(skeleton=out).save(os.path.join(p_root, 'skel_tmp.json')) above = [] prev_frame_id = None for frame_id, dist, time in data: # assert prev_frame_id is None or frame_id != prev_frame_id, \ # 'No: {}'.format(frame_id) if dist > args.threshold: above.append( Span2(start=frame_id, end=frame_id, value=dist, time=time)) prev_frame_id = frame_id spans = [copy.deepcopy(above[0])] it = iter(above) next(it) prev_frame_id = above[0].start for span2 in it: frame_id = span2.start if prev_frame_id + 1 < frame_id: # span = spans[-1] # spans[-1] = span[0], prev_frame_id, span[2] spans[-1].end = prev_frame_id spans.append( Span2(start=frame_id, end=frame_id, time=None, value=span2.value)) else: print(prev_frame_id, frame_id) prev_frame_id = frame_id spans[-1].end = prev_frame_id print("Need replacement: {}".format(above)) print("Need replacement2: {}".format(spans))
""" Setup: conda create --name iMapper python=3 numpy -y \ conda activate iMapper Usage: export PYTHONPATH=$(pwd); python3 example.py """ from imapper.logic.scenelet import Scenelet if __name__ == '__main__': scenelet = Scenelet.load('i3DB/Scene04/gt/skel_lobby19-3_GT.json') skeleton = scenelet.skeleton print('Have {} poses and {} objects' .format(len(skeleton.get_frames()), len(scenelet.objects)))
def work_scene(p_root, plot_fun_name, name_scene, fname): pjoin = os.path.join name_scene = os.path.split(p_root)[-1] p_actions = os.path.join(p_root, 'gt', 'actions.txt') actions = None # if os.path.isfile(p_actions): # with open(p_actions, 'r') as f: # actions = [line.strip().split('\t') for line in f.readlines()] # actions = {int(line[0]): line[1] for line in actions} p_quant2 = pjoin(p_root, _QUANT_FOLDER) if not os.path.isdir(p_quant2): os.makedirs(p_quant2) p_methods = [] # # Tome3D # pfixes = [('Tome3D-nosmooth', '0')] # ('Tome3D-smooth', '10') for postfix, smooth in pfixes: p_tome3d = pjoin(p_quant2, 'skel_{}_{}.json'.format(name_scene, postfix)) p_tome_skels = pjoin(p_root, 'denis') if os.path.isfile(p_tome3d): p_methods.append(NamedSolution('\\tomethreed', p_tome3d)) else: print('Can\'t find Tome3D at {}'.format(p_tome3d)) # # LCR-Net 3D # postfix = 'LCRNet3D-nosmooth' p_lcrnet3d = pjoin(p_quant2, 'skel_{}_{}.json'.format(name_scene, postfix)) if os.path.isfile(p_lcrnet3d): p_methods.append( NamedSolution('\\lcrnetthreed$_{no smooth}$', p_lcrnet3d)) else: print('Can\'t find Tome3D at {}'.format(p_tome3d)) # # GT # p_gt = pjoin(p_root, 'gt', 'skel_{}_GT.json'.format(name_scene)) assert os.path.isfile(p_gt), 'Need gt file: {}'.format(p_gt) sclt_gt = Scenelet.load(p_gt) p_gt_2d = pjoin(p_root, 'gt', 'skel_GT_2d.json') sclt_gt_2d = Scenelet.load(p_gt_2d) # # Evaluate # p_methods.append(NamedSolution('\\name', pjoin(p_root, 'output', fname))) # Append your solution here p_methods.append( NamedSolution('NewMethod', pjoin(p_root, 'NewMethod', fname))) stats = StatsOverTime(plot_fun_name=plot_fun_name, name_scene=name_scene) errors = OrderedDict() for named_solution in p_methods: frame_ids = [ fid for fid in sclt_gt.skeleton.get_frames() if not fid % 2 ] # print(frame_ids) errors_ = evaluate(named_solution=named_solution, sclt_gt=sclt_gt, sclt_gt_2d=sclt_gt_2d, frame_ids=frame_ids, path_scene=p_root, stats=stats, actions=actions) errors[named_solution.name_method] = errors_ series = stats.paint(path_dest=os.path.join(p_root, 'debug_eval')) return errors, series