def __init__(self, args): super(PredictInitPoseAndForce, self).__init__(args) self.environment_layer = EnvWHumanCpFiniteDiffFast self.loss_function = args.loss self.relu = nn.LeakyReLU() self.number_of_cp = args.number_of_cp self.environment = args.instance_environment self.sequence_length = args.sequence_length self.gpu_ids = args.gpu_ids self.feature_extractor = resnet18(pretrained=args.pretrain) del self.feature_extractor.fc self.feature_extractor.eval() self.image_feature_size = 512 self.object_feature_size = 512 self.hidden_size = 512 self.num_layers = 3 # self.input_feature_size = self.image_feature_size + self.object_feature_size self.input_feature_size = self.object_feature_size self.cp_feature_size = self.number_of_cp * 3 self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio) predict_initial_pose_size = torch.Tensor([(2 + 3) * 10, 100, 3 + 4]) self.predict_initial_pose = input_embedding_net(predict_initial_pose_size.long().tolist(), dropout=args.dropout_ratio) input_object_embed_size = torch.Tensor([3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net(input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) state_embed_size = torch.Tensor([EnvState.total_size + self.cp_feature_size, 100, self.object_feature_size]) self.state_embed = input_embedding_net(state_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2, hidden_size=self.hidden_size) forces_directions_decoder_size = torch.Tensor([self.hidden_size, 100, (3) * self.number_of_cp]) self.forces_directions_decoder = input_embedding_net(forces_directions_decoder_size.long().tolist(), dropout=args.dropout_ratio) assert args.batch_size == 1, 'have not been implemented yet, because of the environment' assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda() global DEFAULT_IMAGE_SIZE DEFAULT_IMAGE_SIZE = DEFAULT_IMAGE_SIZE.cuda()
def __init__(self, args): super(NoForceOnlyCPModel, self).__init__(args) self.environment_layer = EnvWHumanCpFiniteDiffFast self.loss_function = args.loss self.number_of_cp = args.number_of_cp self.environment = args.instance_environment self.sequence_length = args.sequence_length self.gpu_ids = args.gpu_ids feature_extractors = {'resnet18': resnet18, 'resnet50': resnet50} self.feature_extractor = feature_extractors[args.feature_extractor]( pretrained=args.pretrain) del self.feature_extractor.fc self.feature_extractor.eval() if args.feature_extractor == 'resnet18': self.image_feature_size = 512 else: self.image_feature_size = 2048 self.object_feature_size = 512 self.hidden_size = 512 self.num_layers = 3 self.input_feature_size = self.object_feature_size self.cp_feature_size = self.number_of_cp * 3 self.image_embed = combine_block_w_do(self.image_feature_size, 64, args.dropout_ratio) input_object_embed_size = torch.Tensor( [3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=64 * 7 * 7 + 512, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) contact_point_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.contact_point_decoder = input_embedding_net( contact_point_decoder_size.long().tolist(), dropout=args.dropout_ratio) assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors( args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda()
def __init__(self, args): super(CurrentMoveFromGazeImgModel, self).__init__(args) self.image_size = args.image_size self.imus = args.imus self.input_length = args.input_length self.output_length = args.output_length self.sequence_length = args.sequence_length self.num_classes = args.num_classes self.gpu_ids = args.gpu_ids self.base_lr = args.base_lr self.image_feature = args.image_feature self.hidden_size = args.hidden_size self.imu_embedding_size = 30 self.loss_function = args.loss self.relu = nn.LeakyReLU() self.num_imus = args.num_imus self.feature_extractor = FeatureLearnerModule(args) self.embedding_input = nn.Linear(args.image_feature, args.hidden_size) self.input_feature_type = args.input_feature_type[0] imu_moves_unembed_size = torch.Tensor( [self.hidden_size, 100, self.num_imus * 1]) self.imu_moves_unembed = input_embedding_net( imu_moves_unembed_size.long().tolist(), dropout=args.dropout) gaze_embed_size = torch.Tensor([2, 100, self.hidden_size]) self.gaze_embed = input_embedding_net(gaze_embed_size.long().tolist(), dropout=args.dropout) self.pointwise_conv = combine_block_w_do(512, 64, args.dropout) self.lstm = nn.LSTM(64 * 7 * 7 + 512, self.hidden_size, batch_first=True, num_layers=3) self.decoder_lstm = nn.LSTM(64 * 7 * 7 + 512, self.hidden_size, batch_first=True, num_layers=3) assert self.input_length == self.sequence_length and self.input_length == self.output_length
def __init__(self, args): super(BaselineRegressForce, self).__init__(args) self.environment_layer = EnvWHumanCpFiniteDiffFast self.loss_function = args.loss self.relu = nn.LeakyReLU() self.number_of_cp = args.number_of_cp self.environment = args.instance_environment self.sequence_length = args.sequence_length self.gpu_ids = args.gpu_ids self.feature_extractor = resnet18(pretrained=args.pretrain) del self.feature_extractor.fc self.feature_extractor.eval() self.image_feature_size = 512 self.object_feature_size = 512 self.hidden_size = 512 self.num_layers = 3 self.input_feature_size = self.object_feature_size self.cp_feature_size = self.number_of_cp * 3 self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio) input_object_embed_size = torch.Tensor([3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net(input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) contact_point_embed_size = torch.Tensor([3 * 5, 100, self.object_feature_size]) self.contact_point_embed = input_embedding_net(contact_point_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=64 * 7 * 7 + 512, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) force_decoder_size = torch.Tensor([self.hidden_size * 2, 100, (3) * self.number_of_cp]) self.force_decoder = input_embedding_net(force_decoder_size.long().tolist(), dropout=args.dropout_ratio) assert (args.mode == 'train' and args.batch_size > 1 and args.break_batch == 1) or (args.mode != 'train' and args.batch_size == 1) self.train_mode = (args.mode == 'train') assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda() if not self.train_mode: BaselineRegressForce.metric += [ metrics.ObjKeypointMetric, # During test time add it metrics.ObjRotationMetric, metrics.ObjPositionMetric, ]
def __init__(self, args): super(ActionReprModel, self).__init__(args) self.image_size = args.image_size self.imus = args.imus self.input_length = args.input_length self.output_length = args.output_length self.sequence_length = args.sequence_length self.num_classes = args.num_classes self.gpu_ids = args.gpu_ids self.base_lr = args.base_lr self.image_feature = args.image_feature self.hidden_size = args.hidden_size self.imu_embedding_size = 30 self.loss_function = args.loss self.relu = nn.LeakyReLU() self.num_imus = args.num_imus self.feature_extractor = FeatureLearnerModule(args) self.embedding_input = nn.Linear(args.image_feature, args.hidden_size) self.pointwise_conv = combine_block_w_do(512, 64, args.dropout) self.number_of_layers = 3 self.lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=self.number_of_layers) action_unembed_size = torch.Tensor([self.hidden_size, 200, self.num_classes]) self.action_unembed = input_embedding_net(action_unembed_size.long().tolist(), dropout=args.dropout) assert self.input_length == self.sequence_length and 1 == self.output_length
def __init__(self, args): super(MoCoIMUModel, self).__init__(args) self.image_size = args.image_size self.imus = args.imus self.input_length = args.input_length self.output_length = args.output_length self.sequence_length = args.sequence_length self.num_classes = args.num_classes self.gpu_ids = args.gpu_ids self.base_lr = args.base_lr self.image_feature = args.image_feature self.hidden_size = args.hidden_size self.imu_embedding_size = 30 self.loss_function = args.loss self.relu = nn.LeakyReLU() self.num_imus = args.num_imus self.feature_extractor = FeatureLearnerModule(args) self.moco_feature_extractor = FeatureLearnerModule(args) self.feature_linear = nn.Linear(512, 128) self.moco_feature_linear = nn.Linear(512, 128) moment_update(self.feature_extractor, self.moco_feature_extractor, 0.0) #Copy feature extractor to moco_feature moment_update(self.feature_linear, self.moco_feature_linear, 0.0) #Copy feature extractor to moco_feature self.alpha = 0.999 queue_size = 16384 assert self.input_length == self.sequence_length == self.output_length imu_unembed_size = torch.Tensor( [self.hidden_size, 100, self.num_imus * 1]) self.imu_unembed = input_embedding_net( imu_unembed_size.long().tolist(), dropout=args.dropout) self.pointwise_conv = combine_block_w_do(512, 64, args.dropout) self.imu_embed_lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=3) self.imu_decoder_lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=3) self.contrast = MemoryMoCo(128, queue_size, 0.07) if args.gpu_ids != -1: self.contrast = self.contrast.cuda()
def __init__(self, args): super(NeuralForceSimulator, self).__init__() self.clean_force = False # neural force simulator self.only_first_img_feature = True self.vis_grad = args.vis_grad self.train_res = args.train_res or self.vis_grad self.hidden_size = 512 self.image_feature_dim = 512 self.num_layers = 3 self.sequence_length = args.sequence_length self.object_feature_size = 512 self.environment = args.instance_environment self.number_of_cp = args.number_of_cp self.feature_extractor = resnet18(pretrained=args.pretrain) del self.feature_extractor.fc if not self.train_res: self.feature_extractor.eval() self.use_lstm = args.lstm self.norm_position = False if self.use_lstm: self.one_ns_layer = NSLSTM( hidden_size=self.hidden_size, layer_norm=False, image_feature_dim=self.image_feature_dim, norm_position=self.norm_position) else: self.one_ns_layer = NSWithImageFeature( hidden_size=self.hidden_size, layer_norm=False, image_feature_dim=self.image_feature_dim, norm_position=self.norm_position) self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio) input_object_embed_size = torch.Tensor( [3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names} assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors( args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda() self.ns_ratio, self.phy_ratio, self.gt_ratio = 1, 1, 0 total_r = self.ns_ratio + self.phy_ratio + self.gt_ratio self.ns_ratio, self.phy_ratio, self.gt_ratio = self.ns_ratio / total_r, self.phy_ratio / total_r, \ self.gt_ratio / total_r
def __init__(self, args): super(VindModel, self).__init__(args) assert args.detach_level == 0 self.image_size = args.image_size self.imus = args.imus self.input_length = args.input_length self.output_length = args.output_length self.sequence_length = args.sequence_length self.num_classes = args.num_classes self.gpu_ids = args.gpu_ids self.base_lr = args.base_lr self.image_feature = args.image_feature self.hidden_size = args.hidden_size self.imu_embedding_size = 30 self.loss_function = args.loss self.relu = nn.LeakyReLU() self.num_imus = args.num_imus self.feature_extractor = FeatureLearnerModule(args) self.embedding_input = nn.Linear(args.image_feature, args.hidden_size) self.pointwise_conv = combine_block_w_do(512, 64, args.dropout) self.conv1 = nn.Conv2d(1, 64, 7, stride=2, padding=3) self.bn1 = nn.BatchNorm2d(64) self.maxpool1 = nn.MaxPool2d(3, stride=2, padding=1) self.conv2 = nn.Conv2d(64, 256, 7, stride=2, padding=3) self.bn2 = nn.BatchNorm2d(256) self.maxpool2 = nn.MaxPool2d(3, stride=2, padding=1) self.conv3 = nn.Conv2d(256, 64, 7, stride=2, padding=3) self.bn3 = nn.BatchNorm2d(64) vind_unembed_size = torch.Tensor( [64 * 7 * 7 * 2, 64 * 7 * 7, self.hidden_size, self.num_classes]) self.vind_unembed = input_embedding_net( vind_unembed_size.long().tolist(), dropout=args.dropout) assert self.input_length == self.sequence_length == 1 == self.output_length
def __init__(self, args): super(SceneClassModel, self).__init__(args) assert args.detach_level == 0 self.image_size = args.image_size self.imus = args.imus self.input_length = args.input_length self.output_length = args.output_length self.sequence_length = args.sequence_length self.num_classes = args.num_classes self.gpu_ids = args.gpu_ids self.base_lr = args.base_lr self.image_feature = args.image_feature self.hidden_size = args.hidden_size self.imu_embedding_size = 30 self.loss_function = args.loss self.relu = nn.LeakyReLU() self.num_imus = args.num_imus self.feature_extractor = FeatureLearnerModule(args) self.embedding_input = nn.Linear(args.image_feature, args.hidden_size) self.input_feature_type = args.input_feature_type[0] self.pointwise_conv = combine_block_w_do(512, 64, args.dropout) scene_unembed_size = torch.Tensor( [64 * 7 * 7, self.hidden_size, self.num_classes]) self.scene_unembed = input_embedding_net( scene_unembed_size.long().tolist(), dropout=args.dropout) assert self.input_length == self.sequence_length == 1 == self.output_length
def __init__(self, args): super(BatchCPHeatmapModel, self).__init__(args) self.use_syn = args.use_syn self.ori_w, self.ori_h = 1920, 1080 self.environment_layer = BatchCPGradientLayer self.loss_function = args.loss self.relu = nn.LeakyReLU() self.number_of_cp = args.number_of_cp self.environment = args.instance_environment self.sequence_length = args.sequence_length self.gpu_ids = args.gpu_ids self.feature_extractor = resnet18(pretrained=args.pretrain) del self.feature_extractor.fc self.feature_extractor.eval() self.image_feature_size = 512 self.object_feature_size = 512 self.hidden_size = 512 self.num_layers = 3 self.input_feature_size = self.object_feature_size self.cp_feature_size = self.number_of_cp * 3 plane_dim = 1024 if self.use_syn else 512 self.image_embed = combine_block_w_do(plane_dim, 64, args.dropout_ratio) self.contact_point_image_embed = combine_block_w_do( plane_dim, 64, args.dropout_ratio) input_object_embed_size = torch.Tensor( [3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) self.contact_point_input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) state_embed_size = torch.Tensor([ EnvState.total_size + self.cp_feature_size, 100, self.object_feature_size ]) self.state_embed = input_embedding_net( state_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) contact_point_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.contact_point_decoder = input_embedding_net( contact_point_decoder_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2, hidden_size=self.hidden_size) forces_directions_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.forces_directions_decoder = input_embedding_net( forces_directions_decoder_size.long().tolist(), dropout=args.dropout_ratio) assert args.batch_size == 1, 'we do not use more than 1 batch size, but accumulate gradients of several steps.' assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors( args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda()
def __init__(self, args): super(ForcePredictor, self).__init__() self.image_feature_size = 512 self.object_feature_size = 512 self.hidden_size = 512 self.num_layers = 3 self.sequence_length = args.sequence_length self.number_of_cp = args.number_of_cp self.use_gt_cp = args.use_gt_cp self.vis_grad = args.vis_grad self.train_res = args.train_res or self.vis_grad self.grad_value = None # force predictor networks. self.feature_extractor = resnet18(pretrained=args.pretrain) del self.feature_extractor.fc if not self.train_res: self.feature_extractor.eval() self.input_feature_size = self.object_feature_size self.cp_feature_size = self.number_of_cp * 3 self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio) self.contact_point_image_embed = combine_block_w_do( 512, 64, args.dropout_ratio) input_object_embed_size = torch.Tensor( [3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) self.contact_point_input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) state_embed_size = torch.Tensor([ NoGradEnvState.total_size + self.cp_feature_size, 100, self.object_feature_size ]) self.state_embed = input_embedding_net( state_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) contact_point_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.contact_point_decoder = input_embedding_net( contact_point_decoder_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2, hidden_size=self.hidden_size) forces_directions_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.forces_directions_decoder = input_embedding_net( forces_directions_decoder_size.long().tolist(), dropout=args.dropout_ratio) assert args.batch_size == 1, 'have not been implemented yet, because of the environment' assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors( args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda()
def __init__(self, args): super(JointNS, self).__init__(args) self.image_feature_size = 512 self.object_feature_size = 512 self.hidden_size = 512 self.num_layers = 3 self.loss_function = args.loss self.number_of_cp = args.number_of_cp self.environment = args.instance_environment self.sequence_length = args.sequence_length self.gpu_ids = args.gpu_ids self.all_obj_names = args.object_list self.use_gt_cp = args.use_gt_cp self.clean_force = True # configs w.r.t. two losses self.joint_two_losses = args.joint_two_losses self.loss1_or_loss2 = None if args.loss1_w < 0.00001: self.loss1_or_loss2 = False # update loss2 only elif args.loss2_w < 0.00001: self.loss1_or_loss2 = True # update loss1 only self.loss1_optim, self.loss2_optim, self.joint_optim = None, None, None # neural force simulator self.use_image_feature = True if not self.use_image_feature: self.one_ns_layer = MLPNS(hidden_size=64, layer_norm=False) else: self.one_ns_layer = NSWithImageFeature(hidden_size=64, layer_norm=False, image_feature_dim=512) # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names} # force predictor networks. self.feature_extractor = resnet18(pretrained=args.pretrain) del self.feature_extractor.fc self.feature_extractor.eval() self.input_feature_size = self.object_feature_size self.cp_feature_size = self.number_of_cp * 3 self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio) self.contact_point_image_embed = combine_block_w_do( 512, 64, args.dropout_ratio) input_object_embed_size = torch.Tensor( [3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) self.contact_point_input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) state_embed_size = torch.Tensor([ NoGradEnvState.total_size + self.cp_feature_size, 100, self.object_feature_size ]) self.state_embed = input_embedding_net( state_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) contact_point_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.contact_point_decoder = input_embedding_net( contact_point_decoder_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2, hidden_size=self.hidden_size) forces_directions_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.forces_directions_decoder = input_embedding_net( forces_directions_decoder_size.long().tolist(), dropout=args.dropout_ratio) assert args.batch_size == 1, 'have not been implemented yet, because of the environment' assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors( args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda() self.force_predictor_modules = [ self.feature_extractor, self.image_embed, self.contact_point_image_embed, self.input_object_embed, self.contact_point_input_object_embed, self.state_embed, self.lstm_encoder, self.contact_point_encoder, self.contact_point_decoder, self.forces_directions_decoder ] # see gradients for debugging self.vis_grad = args.vis_grad self.grad_vis = None self.train_res = args.train_res or self.vis_grad
def __init__(self, args): super(ComplexAEGazeImuModel, self).__init__(args) self.image_size = args.image_size self.imus = args.imus self.input_length = args.input_length self.output_length = args.output_length self.sequence_length = args.sequence_length self.num_classes = args.num_classes self.gpu_ids = args.gpu_ids self.base_lr = args.base_lr self.image_feature = args.image_feature self.hidden_size = args.hidden_size self.imu_embedding_size = 30 self.loss_function = args.loss self.relu = nn.LeakyReLU() self.num_imus = args.num_imus self.feature_extractor = FeatureLearnerModule(args) self.pointwise_conv = combine_block_w_do(512, 64, dropout=0) #Very important self.imu_pointwise_conv = combine_block_w_do(512, 64, args.dropout) self.gaze_pointwise_conv = combine_block_w_do(512, 64, args.dropout) self.reconst_resolution = args.reconst_resolution assert self.reconst_resolution == 56 self.feature_sizes = [7, 14, 28, 56, 56, self.reconst_resolution] self.upscale_factor = [ int(self.feature_sizes[i + 1] / self.feature_sizes[i]) for i in range(len(self.feature_sizes) - 1) ] self.up1 = upshuffle(64, 256, self.upscale_factor[0], kernel_size=3, stride=1, padding=1) self.up2 = upshuffle(256, 128, self.upscale_factor[1], kernel_size=3, stride=1, padding=1) self.up3 = upshuffle(128, 64, self.upscale_factor[2], kernel_size=3, stride=1, padding=1) self.up4 = upshuffle(64, 64, self.upscale_factor[3], kernel_size=3, stride=1, padding=1) self.up5 = upshufflenorelu(64, 3, self.upscale_factor[4]) gaze_unembed_size = torch.Tensor([self.hidden_size, 100, 2]) self.gaze_unembed = input_embedding_net( gaze_unembed_size.long().tolist(), dropout=args.dropout) imu_unembed_size = torch.Tensor( [self.hidden_size, 100, self.num_imus * 1]) self.imu_unembed = input_embedding_net( imu_unembed_size.long().tolist(), dropout=args.dropout) self.imu_embed_lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=3) self.gaze_embed_lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=3) self.gaze_decoder_lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=3) self.imu_decoder_lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=3) assert self.input_length == self.sequence_length == self.sequence_length