def __init__(self, img_emb_size, action_size, args, use_bb_in_input=True, n_classes=0, use_spatial_softmax=True): super(BoundingBoxOnlyModel, self).__init__() # 6 for other oject pose (6D) and 12 for both anchor and other bb. bb_size = 6 + 12 if args.use_bb_in_input else 0 assert bb_size > 0 self.bb_action_model = build_mlp( [bb_size + action_size, 512, 256, 64], activation='relu', ) use_regression = (args.loss_type == 'regr') if use_regression: self.delta_pose_model = build_mlp( [64, 6], activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: assert n_classes > 0, "Invalid number of classes." print("Using classif model with {} classes".format(n_classes)) self.delta_pose_model = build_mlp([64, n_classes], activation='relu', final_nonlinearity=False)
def __init__(self, img_emb_size, action_size, args, use_bb_in_input=True, n_classes=0, use_spatial_softmax=True): super(UnscaledVoxelModel, self).__init__() self.img_emb_size = img_emb_size self.action_size = action_size self.use_spatial_softmax = use_spatial_softmax self.args = args if args.add_xy_channels == 0: input_nc = 3 elif args.add_xy_channels == 1: input_nc = 6 else: raise ValueError("Invalid add_xy_channels") if img_emb_size <= 36: self.voxels_to_rel_emb_model = CML_3(input_nc) else: self.voxels_to_rel_emb_model = CML_2(input_nc) if use_spatial_softmax: z_ss_size = 96 self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder( input_nc, z_ss_size) else: z_ss_size = 0 # 6 for other oject pose (6D) and 12 for both anchor and other bb. bb_size = 6 + 12 if args.use_bb_in_input else 0 self.img_action_model = build_mlp( [ img_emb_size + bb_size + action_size + z_ss_size, img_emb_size // 2, img_emb_size // 2, img_emb_size // 4 ], activation='relu', ) use_regression = (args.loss_type == 'regr') if use_regression: output_size = 6 self.delta_pose_model = build_mlp( [img_emb_size // 4, 64, output_size], activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: assert n_classes > 0, "Invalid number of classes." print("Using classif model with {} classes".format(n_classes)) self.delta_pose_model = build_mlp( [img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False)
def __init__(self, resnet_klass, img_emb_size, action_size, args, use_bb_in_input=True, n_classes=0, use_spatial_softmax=True): super(UnscaledResNetVoxelModel, self).__init__() self.img_emb_size = img_emb_size self.action_size = action_size self.use_spatial_softmax = use_spatial_softmax self.args = args if args.add_xy_channels == 0: input_nc = 3 elif args.add_xy_channels == 1: input_nc = 6 else: raise ValueError("Invalid add_xy_channels") self.resnet = resnet_klass(emb_size=img_emb_size, input_channels=input_nc) z_ss_size = 0 bb_size = 6 + 12 if args.use_bb_in_input else 0 self.img_action_model = build_mlp( [ img_emb_size + bb_size + action_size + z_ss_size, img_emb_size // 2, img_emb_size // 4 ], activation='relu', final_nonlinearity=True, ) use_regression = (args.loss_type == 'regr') if use_regression: output_size = 6 self.delta_pose_model = build_mlp( [img_emb_size // 4, output_size], activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: assert n_classes > 0, "Invalid number of classes." print("Using classif model with {} classes".format(n_classes)) self.delta_pose_model = build_mlp([img_emb_size // 4, n_classes], activation='relu', final_nonlinearity=False)
def __init__(self, img_emb_size, args, use_bb_in_input=True, n_classes=0, use_spatial_softmax=True): super(UnscaledPrecondVoxelModel, self).__init__() self.img_emb_size = img_emb_size self.use_spatial_softmax = use_spatial_softmax self.args = args if args.add_xy_channels == 0: input_nc = 3 elif args.add_xy_channels == 1: input_nc = 6 else: raise ValueError("Invalid add_xy_channels") self.voxels_to_rel_emb_model = CML_2(input_nc) if use_spatial_softmax: z_ss_size = 96 self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder( input_nc, z_ss_size) else: z_ss_size = 0 # 6 for other oject pose (6D) and 12 for both anchor and other bb. bb_size = 12 if args.use_bb_in_input else 0 self.img_emb_pred_model = build_mlp( [img_emb_size + bb_size + z_ss_size, 256, 64, 1], activation='relu', final_nonlinearity=False)
def __init__(self, img_emb_size, action_size, args, use_bb_in_input=True): super(SimpleModel, self).__init__() self.img_emb_size = img_emb_size self.action_size = action_size self.args = args if args.add_xy_channels == 0: input_nc = 3 elif args.add_xy_channels in (1, 2): input_nc = 5 self.img_to_rel_emb_model = ConvImageEncoder(input_nc, img_emb_size, inp_size=256) z_ss_size = 128 self.spatial_softmax_encoder = SpatialSoftmaxImageEncoder( input_nc, z_ss_size) # self.img_to_rel_emb_model = ResnetGenerator_Encoder( # input_nc=input_nc, # output_nc=3, # n_blocks=1, # ngf=2, # n_downsampling=8, # final_channels=img_emb_size//2) bb_size = 8 if args.use_bb_in_input else 0 self.img_action_model = build_mlp( [ img_emb_size + bb_size + action_size + z_ss_size, 256, 256, img_emb_size ], activation='relu', final_nonlinearity=True, ) self.delta_pose_model = build_mlp( [img_emb_size, 128, 4], activation='relu', final_nonlinearity=False, ) self.inv_model = build_mlp( [img_emb_size + 4, 256, 256, img_emb_size + z_ss_size])
def __init__(self, img_emb_size, action_size, args, use_bb_in_input=True, n_classes=0, use_spatial_softmax=True): super(SmallEmbUnscaledVoxelModel, self).__init__() self.img_emb_size = img_emb_size self.action_size = action_size self.use_spatial_softmax = use_spatial_softmax self.args = args if args.add_xy_channels == 0: input_nc = 3 elif args.add_xy_channels == 1: input_nc = 6 else: raise ValueError("Invalid add_xy_channels") # Why do we need an extra +3 ? # self.voxels_to_rel_emb_model = CML_4(input_nc, img_emb_size+3, out_emb=None) self.voxels_to_rel_emb_model = CML_4(input_nc, img_emb_size, out_emb=None) if use_spatial_softmax: z_ss_size = 96 self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder( input_nc, z_ss_size) else: z_ss_size = 0 # 6 for other oject pose (6D) and 12 for both anchor and other bb. bb_size = 6 + 12 if args.use_bb_in_input else 0 delta_pose_model_inp_size = img_emb_size \ + bb_size \ + action_size \ + z_ss_size use_regression = (args.loss_type == 'regr') delta_pose_model_out_size = 6 if use_regression else n_classes if (delta_pose_model_inp_size // 4) >= 32: delta_pose_model_layers = [ delta_pose_model_inp_size, delta_pose_model_inp_size // 2, delta_pose_model_inp_size // 4, delta_pose_model_out_size ] else: delta_pose_model_layers = [ delta_pose_model_inp_size, delta_pose_model_inp_size // 2, delta_pose_model_out_size ] if use_regression: output_size = 6 self.delta_pose_model = build_mlp( delta_pose_model_layers, activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: assert n_classes > 0, "Invalid number of classes." print("Using classif model with {} classes".format(n_classes)) self.delta_pose_model = build_mlp(delta_pose_model_layers, activation='relu', final_nonlinearity=False)
def __init__(self, img_emb_size, action_size, args, use_bb_in_input=True, use_spatial_softmax=True): super(UnscaledClassifVoxelModel, self).__init__() self.img_emb_size = img_emb_size self.action_size = action_size self.use_spatial_softmax = use_spatial_softmax self.args = args if args.add_xy_channels == 0: input_nc = 3 elif args.add_xy_channels == 1: input_nc = 6 else: raise ValueError("Invalid add_xy_channels") self.voxels_to_rel_emb_model = CML_2(input_nc) if use_spatial_softmax: z_ss_size = 96 self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder( input_nc, z_ss_size) else: z_ss_size = 0 # 6 for other oject pose (6D) and 12 for both anchor and other bb. bb_size = 6 + 12 if args.use_bb_in_input else 0 self.img_action_model = build_mlp( [ img_emb_size + bb_size + action_size + z_ss_size, img_emb_size // 2, img_emb_size // 2, img_emb_size // 4 ], activation='relu', ) if args.pos_loss_type == 'regr': output_size = 3 self.delta_pose_model = build_mlp( [img_emb_size // 4, 64, output_size], activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: n_classes = args.pos_classif_num_classes assert n_classes > 0 self.delta_pos_x = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_pos_y = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_pos_z = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) if args.orient_loss_type == 'regr': output_size = 3 self.delta_orient_model = build_mlp( [img_emb_size // 4, 64, output_size], activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: n_classes = args.orient_classif_num_classes assert n_classes > 0 self.delta_orient_x = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_orient_y = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_orient_z = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False)
def __init__(self, resnet_klass, img_emb_size, action_size, args, use_bb_in_input=True, use_spatial_softmax=True): super(UnscaledClassifResNetVoxelModel, self).__init__() self.img_emb_size = img_emb_size self.action_size = action_size self.use_spatial_softmax = use_spatial_softmax self.args = args if args.add_xy_channels == 0: input_nc = 3 elif args.add_xy_channels == 1: input_nc = 6 else: raise ValueError("Invalid add_xy_channels") self.resnet = resnet_klass(emb_size=img_emb_size, input_channels=input_nc) z_ss_size = 0 bb_size = 6 + 12 if args.use_bb_in_input else 0 self.img_action_model = build_mlp( [ img_emb_size + bb_size + action_size + z_ss_size, img_emb_size // 2, img_emb_size // 4 ], activation='relu', final_nonlinearity=True, ) if args.pos_loss_type == 'regr': output_size = 3 self.delta_pose_model = build_mlp( [img_emb_size // 4, 64, output_size], activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: n_classes = args.pos_classif_num_classes assert n_classes > 0 self.delta_pos_x = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_pos_y = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_pos_z = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) if args.orient_loss_type == 'regr': output_size = 3 self.delta_orient_model = build_mlp( [img_emb_size // 4, 64, output_size], activation='relu', final_nonlinearity=False, ) print("Using regression model.") else: n_classes = args.orient_classif_num_classes assert n_classes > 0 self.delta_orient_x = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_orient_y = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False) self.delta_orient_z = build_mlp([img_emb_size // 4, 64, n_classes], activation='relu', final_nonlinearity=False)