def get_model(args, get_video_encoder_only=True, logger=None):
    
    # Load model
    model = load_model(
        vid_base_arch=args.vid_base_arch, 
        aud_base_arch=args.aud_base_arch, 
        pretrained=args.pretrained,
        num_classes=args.num_clusters,
        norm_feat=False,
        use_mlp=args.use_mlp,
        headcount=args.headcount
    )

    # Load model weights
    start = time.time()
    weight_path_type = type(args.weights_path)
    if weight_path_type == str:
        weight_path_not_none = args.weights_path != 'None' 
    else:
        weight_path_not_none = args.weights_path is not None
    if weight_path_not_none:
        print("Loading model weights")
        if os.path.exists(args.weights_path):
            ckpt_dict = torch.load(args.weights_path)
            model_weights = ckpt_dict["model"]
            args.ckpt_epoch = ckpt_dict['epoch']
            print(f"Epoch checkpoint: {args.ckpt_epoch}", flush=True)
            utils.load_model_parameters(model, model_weights)
    print(f"Time to load model weights: {time.time() - start}")

    # Put model in eval mode
    model.eval()

    # Get video encoder for video-only retrieval
    if get_video_encoder_only:
        model = model.video_network.base
        if args.pool_op == 'max': 
            pool = torch.nn.MaxPool3d((2, 2, 2), stride=(2, 2, 2))
        elif args.pool_op == 'avg': 
            pool = torch.nn.AvgPool3d((2, 2, 2), stride=(2, 2, 2))
        else:
            assert("Only 'max' and 'avg' pool operations allowed")

        # Set up model
        model = torch.nn.Sequential(*[
            model.stem,
            model.layer1,
            model.layer2,
            model.layer3,
            model.layer4,
            pool,
            Flatten(),
        ])

    if torch.cuda.is_available():
        model = model.cuda()
        model = torch.nn.DataParallel(model)
    return model
示例#2
0
def make_basic_cnn(nb_filters=64, nb_classes=10,
                   input_shape=(None, 28, 28, 1)):
  layers = [Conv2D(nb_filters, (8, 8), (2, 2), "SAME"),
            ReLU(),
            Conv2D(nb_filters * 2, (6, 6), (2, 2), "VALID"),
            ReLU(),
            Conv2D(nb_filters * 2, (5, 5), (1, 1), "VALID"),
            ReLU(),
            Flatten(),
            Linear(nb_classes),
            Softmax()]

  model = MLP(nb_classes, layers, input_shape)
  return model
示例#3
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 hidden_size=512,
                 base_kwargs=None):
        super(ICM, self).__init__()
        self.obs_shape = obs_shape  # C x H x W
        self.action_space = action_space

        num_inputs = self.obs_shape[0]
        num_outputs = self.action_space.n

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        # f(obs) = hidden_size
        self.phi = nn.Sequential(init_(nn.Conv2d(num_inputs, 32, 8, stride=4)),
                                 nn.ReLU(),
                                 init_(nn.Conv2d(32, 64, 4, stride=2)),
                                 nn.ReLU(),
                                 init_(nn.Conv2d(64, 32, 3, stride=1)),
                                 nn.ReLU(), Flatten(),
                                 init_(nn.Linear(32 * 7 * 7, hidden_size)),
                                 nn.ReLU())

        # f(phi_obs, action) = hidden_size
        num_inputs_forward_dynamic = hidden_size + num_outputs
        self.forward_dynamic = nn.Sequential(
            nn.Linear(num_inputs_forward_dynamic, 256), nn.ReLU(),
            nn.Linear(256, hidden_size))

        # f(phi_obs, phi_obs_next) = num_outputs
        num_inputs_inverse_dynamic = hidden_size + hidden_size
        self.inverse_dynamic = nn.Sequential(
            nn.Linear(num_inputs_inverse_dynamic, 256), nn.ReLU(),
            nn.Linear(256, num_outputs))

        model_params = list(self.phi.parameters()) + list(
            self.forward_dynamic.parameters()) + list(
                self.inverse_dynamic.parameters())
        self.fwd_loss_func = nn.MSELoss(reduction='none')
        self.inv_loss_func = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(model_params, lr=1e-3)

        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
示例#4
0
def make_madry_ngpu(nb_classes=10, input_shape=(None, 28, 28, 1), **kwargs):
  """
  Create a multi-GPU model similar to Madry et al. (arXiv:1706.06083).
  """
  layers = [Conv2DnGPU(32, (5, 5), (1, 1), "SAME"),
            ReLU(),
            MaxPool((2, 2), (2, 2), "SAME"),
            Conv2DnGPU(64, (5, 5), (1, 1), "SAME"),
            ReLU(),
            MaxPool((2, 2), (2, 2), "SAME"),
            Flatten(),
            LinearnGPU(1024),
            ReLU(),
            LinearnGPU(nb_classes),
            Softmax()]

  model = MLPnGPU(nb_classes, layers, input_shape)
  return model
    def __init__(self, batch_size):
        super(CNN_2_EDropout, self).__init__()
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=96,
                              kernel_size=5, padding=1, stride=1)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, padding=0, stride=2)
        self.cnn2 = nn.Conv2d(in_channels=96, out_channels=128,
                              kernel_size=5, padding=2, stride=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, padding=0, stride=2)
        self.cnn3 = nn.Conv2d(in_channels=128, out_channels=256,
                              kernel_size=5, padding=2, stride=1)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, padding=0, stride=2)

        self.flatten = Flatten()

        self.fc1 = nn.Linear(in_features=1024, out_features=2048)
        self.fc2 = nn.Linear(in_features=2048, out_features=2048)
        self.fc3 = nn.Linear(in_features=2048, out_features=10)

        self.ed = EDropout(p=0.5, train=self.training, inplace=True)

        self.batch_size = batch_size
示例#6
0
#         nn.Dropout2d(p=0.25),
#         Flatten(),
#         nn.Linear(channel_3 * 64 * 64//4, 2)
#     )
channel_0 = 32
channel_1 = 16
channel_2 = 8
channel_3 = 4
model = nn.Sequential(nn.Conv2d(3, channel_0, (7, 7), padding=3), nn.ReLU(),
                      nn.Dropout2d(p=0.25),
                      nn.Conv2d(channel_0, channel_1, (5, 5), padding=2),
                      nn.ReLU(), nn.MaxPool2d((2, 2)),
                      nn.Conv2d(channel_1, channel_2, (3, 3), padding=1),
                      nn.ReLU(), nn.Dropout2d(p=0.25),
                      nn.Conv2d(channel_2, channel_3, (3, 3), padding=1),
                      nn.ReLU(), nn.MaxPool2d((2, 2)), Flatten(),
                      nn.Linear(channel_3 * 64 * 64 // 16, 2))
model.load_state_dict(load("model_bis0.92.pth", map_location=dvc),
                      strict=False)


def predict_image(image):
    to_pil = ToPILImage()
    image = to_pil(image).convert('RGB')
    test_transforms = Compose([
        Resize([64, 64]),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image_tensor = test_transforms(image).float()
    image_tensor = image_tensor.unsqueeze_(0)