def extract_image_feats(video_path):
    print('extracting image features...')
    model = resnet152(pretrained='imagenet')
    model = model.cuda()
    # model = nn.DataParallel(model)
    model.last_linear = utils.Identity()
    model.eval()
    C, H, W = 3, 224, 224
    load_image_fn = utils.LoadTransformImage(model)
    dst = os.path.join(video_path.split('/')[0], 'info')
    with open(os.devnull, "w") as ffmpeg_log:
        command = 'ffmpeg -i ' + video_path + ' -vf scale=400:300 ' + '-qscale:v 2 '+ '{0}/%06d.jpg'.format(dst)
        subprocess.call(command, shell=True, stdout=ffmpeg_log, stderr=ffmpeg_log)
    image_list = sorted(glob.glob(os.path.join(dst, '*.jpg')))
    samples = np.round(np.linspace(0, len(image_list) - 1, 80))
    image_list = [image_list[int(sample)] for sample in samples]
    images = torch.zeros((len(image_list), C, H, W))
    for i in range(len(image_list)):
        img = load_image_fn(image_list[i])
        images[i] = img
    with torch.no_grad():
        image_feats = model(images.cuda().squeeze())
    image_feats = image_feats.cpu().numpy()
    for file in os.listdir(dst):
        if file.endswith('.jpg'):
            os.remove(os.path.join(dst, file))

    return image_feats
Пример #2
0
def extract_image_feats(video_path):
    hasilPred.configure(text="Membuat Prediksi....")
    model = resnet152(pretrained='imagenet')
    model = model.cuda()
    model.last_linear = utils.Identity()
    model.eval()
    C, H, W = 3, 224, 224
    load_image_fn = utils.LoadTransformImage(model)
    dst = os.path.join(video_path.split('\\')[0], 'info')
    if os.path.exists(dst):
        print(" Menghapus Direktori: " + dst + "\\")
        shutil.rmtree(dst)
    os.makedirs(dst)
    with open(os.devnull, "w") as ffmpeg_log:
        command = 'ffmpeg -i ' + video_path + ' -vf scale=400:300 ' + '-qscale:v 2 ' + '{0}/%06d.jpg'.format(
            dst)
        subprocess.call(command,
                        shell=True,
                        stdout=ffmpeg_log,
                        stderr=ffmpeg_log)
    list_image = sorted(glob.glob(os.path.join(dst, '*.jpg')))
    samples = np.round(np.linspace(0, len(list_image) - 1, 80))
    list_image = [list_image[int(sample)] for sample in samples]
    images = torch.zeros((len(list_image), C, H, W))
    for i in range(len(list_image)):
        img = load_image_fn(list_image[i])
        images[i] = img
    with torch.no_grad():
        image_feats = model(images.cuda().squeeze())
    image_feats = image_feats.cpu().numpy()
    for file in os.listdir(dst):
        if file.endswith('.jpg'):
            os.remove(os.path.join(dst, file))

    return image_feats
Пример #3
0
def generate_C2D_model(opt):
    if opt.c2d_model_name == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'inceptionresnetv2':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionresnetv2(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    model.last_linear = utils.Identity()

    if not opt.no_cuda:
        model = model.to(opt.device)

    return load_image_fn, model, (C, H, W)
Пример #4
0
def extract_feats(args):
    params = args
    if params['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    else:
        print("doesn't support %s" % (params['model']))

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)
    model = model.cuda()
    prepro_feats.extract_feats(params, model, load_image_fn)
Пример #5
0
def fix_frame_extract(frame_path, feats_path, frames_num, model, video_name):
    # load model
    C, H, W = 3, 224, 224
    if model == 'resnet152':
        model = pretrainedmodels.resnet152(pretrained='imagenet')
    elif model == 'vgg16':
        model = pretrainedmodels.vgg16(pretrained='imagenet')
    elif model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(pretrained='imagenet')
    model.last_linear = utils.Identity()
    model = model.to(device)
    model.eval()
    load_image_fn = utils.LoadTransformImage(model)

    # load data
    img_list = sorted(frame_path.glob('*.jpg'))
    # get index
    samples_ix = np.linspace(0, len(img_list) - 1, frames_num).astype(int)
    img_list = [img_list[i] for i in samples_ix]
    # build tensor
    imgs = torch.zeros([len(img_list), C, H, W])
    for i in range(len(img_list)):
        img = load_image_fn(img_list[i])
        imgs[i] = img
    imgs = imgs.to(device)
    with torch.no_grad():
        feats = model(imgs)
    feats = feats.cpu().numpy()
    # save
    np.save(os.path.join(feats_path, video_name + ".npy"), feats)
Пример #6
0
def build_model(model_name):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load pretrained model

    model_name = model_name  # could be fbresnet152 or inceptionresnetv2

    if (model_name == 'senet154'):
        model = pretrainedmodels.senet154(pretrained='imagenet')
    elif (model_name == 'se_resnet152'):
        model = pretrainedmodels.se_resnet152(pretrained='imagenet')
    elif (model_name == 'se_resnext101_32x4d'):
        model = pretrainedmodels.se_resnext101_32x4d(pretrained='imagenet')
    elif (model_name == 'resnet152'):
        model = pretrainedmodels.resnet152(pretrained='imagenet')
    elif (model_name == 'resnet101'):
        model = pretrainedmodels.resnet101(pretrained='imagenet')
    elif (model_name == 'densenet201'):
        model = pretrainedmodels.densenet201(pretrained='imagenet')

    model.to(device)
    for param in model.parameters():
        param.requires_grad = False

    num_ftrs = model.last_linear.in_features

    class CustomModel(nn.Module):
        def __init__(self, model):
            super(CustomModel, self).__init__()
            self.features = nn.Sequential(*list(model.children())[:-1])
            self.classifier = nn.Sequential(
                torch.nn.Linear(num_ftrs, 128),
                torch.nn.Dropout(0.3),  # drop 50% of the neuron
                torch.nn.Linear(128, 7))

        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    model = CustomModel(model)
    freeze_layer(model.features)
    num_ftrs = list(model.classifier.children())[-1].out_features

    model.to(device)
    model.name = model_name
    PATH = os.path.abspath(os.path.dirname(__file__))

    PATH_par = os.path.abspath(os.path.join(PATH, os.pardir))
    path_to_model = os.path.join(PATH_par, 'pretrained_model', '128_7')

    model.load_state_dict(
        torch.load(os.path.join(path_to_model, '%s.pth' % (model_name))))
    model.to(device)
    for param in model.parameters():
        param.requires_grad = False

    return model, num_ftrs
Пример #7
0
def get_resnet152():
    model = resnet152(pretrained=True)
    w = model.conv1.weight
    model.conv1 = nn.Conv2d(4,
                            64,
                            kernel_size=(7, 7),
                            stride=(2, 2),
                            padding=(3, 3),
                            bias=False)
    model.conv1.weight = torch.nn.Parameter(
        torch.cat((w, torch.mean(w, dim=1).unsqueeze(1)), dim=1))

    model.avgpool = nn.Sequential(
        nn.MaxPool2d(kernel_size=16, stride=2, padding=0), )
    model.fc = nn.Sequential(nn.Dropout(), nn.Linear(model.fc.in_features, 28))

    return model
def generate_2D_model(opt):
    if opt['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg16':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg16(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg19':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg19(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet50':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet50(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet101':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet101(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'nasnet':
        C, H, W = 3, 331, 331
        model = pretrainedmodels.nasnetalarge(num_classes=1001,
                                              pretrained='imagenet+background')
        load_image_fn = utils.LoadTransformImage(model)
    else:
        print("doesn't support %s" % (opt['model']))

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)
    # if opt['saved_model'] != '':
    #     model.load_state_dict(torch.load(opt['saved_model']), strict=False)
    model = model.cuda()
    return model
Пример #9
0
def resnet152(input_size=(3, 224, 224), num_classes=1000, pretrained=None):
    model = models.resnet152(pretrained=pretrained)
    model = add_instances_to_torchvisionmodel(model)
    # Change the First Convol2D layer into new input shape
    if input_size != (3, 224, 224):
        model.conv1 = nn.Conv2d(input_size[0],
                                64,
                                kernel_size=(7, 7),
                                stride=(2, 2),
                                padding=(3, 3),
                                bias=False)
        model.input_size = input_size

    del model.fc
    del model.avgpool

    # calc kernel_size on new_avgpool2d layer
    test_tensor = torch.randn((1, input_size[0], input_size[1], input_size[2]))
    features = model.features(test_tensor)
    # print(features, features.shape[2], features.shape[3])
    avg_pool2d_kernel_size = (features.shape[2], features.shape[3])

    # calc last linear size
    x = F.avg_pool2d(features, kernel_size=avg_pool2d_kernel_size)
    x = x.view(x.size(0), -1).shape[1]
    model.last_linear = nn.Linear(in_features=x, out_features=num_classes)

    #del model.logits
    #del model.forward
    def logits(self, features):
        x = F.relu(features, inplace=False)
        x = F.avg_pool2d(x, kernel_size=avg_pool2d_kernel_size, stride=1)
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, input):
        x = self.features(input)
        x = self.logits(x)
        return x

    model.logits = types.MethodType(logits, model)
    model.forward = types.MethodType(forward, model)
    return model
Пример #10
0
def extract_feats(frame_path, feats_path, interval, model, video_name):
    """
    extract feature from frames of one video
    :param video_name:
    :param model: name of model
    :param frame_path: path of frames
    :param feats_path: path to store results
    :param interval: (str) The interval when extract frames from videos
    :return: None
    """
    # load model
    C, H, W = 3, 224, 224
    if model == 'resnet152':
        model = pretrainedmodels.resnet152(pretrained='imagenet')
    elif model == 'vgg16':
        model = pretrainedmodels.vgg16(pretrained='imagenet')
    elif model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(pretrained='imagenet')
    model.last_linear = utils.Identity()
    model = model.to(device)
    model.eval()
    load_image_fn = utils.LoadTransformImage(model)

    # load data
    img_list = sorted(frame_path.glob('*.jpg'))
    # get index
    samples_ix = np.arange(0, len(img_list), interval)
    img_list = [img_list[int(i)] for i in samples_ix]
    # build tensor
    imgs = torch.zeros([len(img_list), C, H, W])
    for i in range(len(img_list)):
        img = load_image_fn(img_list[i])
        imgs[i] = img
    imgs = imgs.to(device)
    with torch.no_grad():
        feats = model(imgs)
    feats = feats.cpu().numpy()
    # save
    np.save(os.path.join(feats_path, video_name + ".npy"), feats)
Пример #11
0
def main(args):
    global C, H, W
    coco_labels = json.load(open(args.coco_labels))
    num_classes = coco_labels['num_classes']
    if args.model == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')

    elif args.model == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')

    elif args.model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')

    else:
        print("doesn't support %s" % (args['model']))

    load_image_fn = utils.LoadTransformImage(model)
    dim_feats = model.last_linear.in_features
    model = MILModel(model, dim_feats, num_classes)
    model = model.cuda()
    dataset = CocoDataset(coco_labels)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=args.learning_rate_decay_every,
        gamma=args.learning_rate_decay_rate)

    crit = nn.MultiLabelSoftMarginLoss()
    if not os.path.isdir(args.checkpoint_path):
        os.mkdir(args.checkpoint_path)
    train(dataloader, model, crit, optimizer, exp_lr_scheduler, load_image_fn,
          args)
Пример #12
0
def build_model():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load pretrained model

    model_name = 'resnet152'
    model = pretrainedmodels.resnet152(pretrained='imagenet')

    model.to(device)
    for param in model.parameters():
        param.requires_grad = False

    num_ftrs = model.last_linear.in_features

    class CustomModel(nn.Module):
        def __init__(self, model):
            super(CustomModel, self).__init__()
            self.features = nn.Sequential(*list(model.children())[:-1])
            self.classifier = nn.Sequential(
                torch.nn.Linear(num_ftrs, 128),
                torch.nn.Dropout(0.3),  # drop 50% of the neuron
                torch.nn.Linear(128, 7))

        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    model = CustomModel(model)
    freeze_layer(model.features)
    num_ftrs = list(model.classifier.children())[-1].out_features

    model.load_state_dict(torch.load('resnet152.pth'))
    model.to(device)
    model.name = model_name
    return model, num_ftrs
        params['feat_path'], params['feat_name'] +
        ('' if '.hdf5' in params['feat_name'] else '.hdf5'))
    params['logit_dir'] = os.path.join(
        params['feat_path'], params['logit_name'] +
        ('' if '.hdf5' in params['logit_name'] else '.hdf5'))

    print('Model: %s' % params['model'])
    print('The extracted features will be saved to --> %s' %
          params['feat_dir'])

    if params['model'] == 'resnet101':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet101(pretrained='imagenet')
    elif params['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
    elif params['model'] == 'resnet18':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet18(pretrained='imagenet')
    elif params['model'] == 'resnet34':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet34(pretrained='imagenet')
    elif params['model'] == 'inceptionresnetv2':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionresnetv2(
            num_classes=1001, pretrained='imagenet+background')
    elif params['model'] == 'googlenet':
        C, H, W = 3, 224, 224
        model = googlenet(pretrained=True)
        print(model)
    else:
Пример #14
0
    def __init__(self,
                 backbone,
                 heads,
                 head_conv=128,
                 num_filters=[256, 256, 256],
                 pretrained=True,
                 dcn=False,
                 gn=False,
                 ws=False,
                 freeze_bn=False,
                 after_non_local='layer1',
                 non_local_hidden_channels=None):
        super().__init__()

        self.heads = heads

        if backbone == 'resnet18':
            pretrained = 'imagenet' if pretrained else None
            self.backbone = pretrainedmodels.resnet18(pretrained=pretrained)
            num_bottleneck_filters = 512
        elif backbone == 'resnet34':
            pretrained = 'imagenet' if pretrained else None
            self.backbone = pretrainedmodels.resnet34(pretrained=pretrained)
            num_bottleneck_filters = 512
        elif backbone == 'resnet50':
            pretrained = 'imagenet' if pretrained else None
            self.backbone = pretrainedmodels.resnet50(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif backbone == 'resnet101':
            pretrained = 'imagenet' if pretrained else None
            self.backbone = pretrainedmodels.resnet101(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif backbone == 'resnet152':
            pretrained = 'imagenet' if pretrained else None
            self.backbone = pretrainedmodels.resnet152(pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif backbone == 'se_resnext50_32x4d':
            pretrained = 'imagenet' if pretrained else None
            self.backbone = pretrainedmodels.se_resnext50_32x4d(
                pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif backbone == 'se_resnext101_32x4d':
            pretrained = 'imagenet' if pretrained else None
            self.backbone = pretrainedmodels.se_resnext101_32x4d(
                pretrained=pretrained)
            num_bottleneck_filters = 2048
        elif backbone == 'resnet34_v1b':
            self.backbone = timm.create_model('gluon_resnet34_v1b',
                                              pretrained=pretrained)
            convert_to_inplace_relu(self.backbone)
            num_bottleneck_filters = 512
        elif backbone == 'resnet50_v1d':
            self.backbone = timm.create_model('gluon_resnet50_v1d',
                                              pretrained=pretrained)
            convert_to_inplace_relu(self.backbone)
            num_bottleneck_filters = 2048
        elif backbone == 'resnet101_v1d':
            self.backbone = timm.create_model('gluon_resnet101_v1d',
                                              pretrained=pretrained)
            convert_to_inplace_relu(self.backbone)
            num_bottleneck_filters = 2048
        elif backbone == 'resnext50_32x4d':
            self.backbone = timm.create_model('resnext50_32x4d',
                                              pretrained=pretrained)
            convert_to_inplace_relu(self.backbone)
            num_bottleneck_filters = 2048
        elif backbone == 'resnext50d_32x4d':
            self.backbone = timm.create_model('resnext50d_32x4d',
                                              pretrained=pretrained)
            convert_to_inplace_relu(self.backbone)
            num_bottleneck_filters = 2048
        elif backbone == 'seresnext26_32x4d':
            self.backbone = timm.create_model('seresnext26_32x4d',
                                              pretrained=pretrained)
            convert_to_inplace_relu(self.backbone)
            num_bottleneck_filters = 2048
        elif backbone == 'resnet18_ctdet':
            self.backbone = models.resnet18()
            state_dict = torch.load(
                'pretrained_weights/ctdet_coco_resdcn18.pth')['state_dict']
            self.backbone.load_state_dict(state_dict, strict=False)
            num_bottleneck_filters = 512
        elif backbone == 'resnet50_maskrcnn':
            self.backbone = models.detection.maskrcnn_resnet50_fpn(
                pretrained=pretrained).backbone.body
            print(self.backbone)
            num_bottleneck_filters = 2048
        else:
            raise NotImplementedError

        if after_non_local is not None:
            self.after_non_local = after_non_local
            in_channels = getattr(self.backbone,
                                  after_non_local)[0].conv1.in_channels
            if non_local_hidden_channels is None:
                non_local_hidden_channels = in_channels // 2
            self.non_local = NonLocal2d(in_channels, non_local_hidden_channels)

        if freeze_bn:
            for m in self.backbone.modules():
                if isinstance(m, nn.BatchNorm2d):
                    m.weight.requires_grad = False
                    m.bias.requires_grad = False

        self.lateral4 = nn.Sequential(
            Conv2d(num_bottleneck_filters,
                   num_filters[0],
                   kernel_size=1,
                   bias=False,
                   ws=ws),
            nn.GroupNorm(32, num_filters)
            if gn else nn.BatchNorm2d(num_filters[0]), nn.ReLU(inplace=True))
        self.lateral3 = nn.Sequential(
            Conv2d(num_bottleneck_filters // 2,
                   num_filters[0],
                   kernel_size=1,
                   bias=False,
                   ws=ws),
            nn.GroupNorm(32, num_filters[0])
            if gn else nn.BatchNorm2d(num_filters[0]), nn.ReLU(inplace=True))
        self.lateral2 = nn.Sequential(
            Conv2d(num_bottleneck_filters // 4,
                   num_filters[1],
                   kernel_size=1,
                   bias=False,
                   ws=ws),
            nn.GroupNorm(32, num_filters[1])
            if gn else nn.BatchNorm2d(num_filters[1]), nn.ReLU(inplace=True))
        self.lateral1 = nn.Sequential(
            Conv2d(num_bottleneck_filters // 8,
                   num_filters[2],
                   kernel_size=1,
                   bias=False,
                   ws=ws),
            nn.GroupNorm(32, num_filters)
            if gn else nn.BatchNorm2d(num_filters[2]), nn.ReLU(inplace=True))

        self.decode3 = nn.Sequential(
            DCN(num_filters[0], num_filters[1],
                kernel_size=3, padding=1, stride=1) if dcn else \
            Conv2d(num_filters[0], num_filters[1],
                   kernel_size=3, padding=1, bias=False, ws=ws),
            nn.GroupNorm(32, num_filters[1]) if gn else nn.BatchNorm2d(num_filters[1]),
            nn.ReLU(inplace=True))
        self.decode2 = nn.Sequential(
            Conv2d(num_filters[1],
                   num_filters[2],
                   kernel_size=3,
                   padding=1,
                   bias=False,
                   ws=ws),
            nn.GroupNorm(32, num_filters[2])
            if gn else nn.BatchNorm2d(num_filters[2]), nn.ReLU(inplace=True))
        self.decode1 = nn.Sequential(
            Conv2d(num_filters[2],
                   num_filters[2],
                   kernel_size=3,
                   padding=1,
                   bias=False,
                   ws=ws),
            nn.GroupNorm(32, num_filters[2])
            if gn else nn.BatchNorm2d(num_filters[2]), nn.ReLU(inplace=True))

        for head in sorted(self.heads):
            num_output = self.heads[head]
            fc = nn.Sequential(
                Conv2d(num_filters[2],
                       head_conv,
                       kernel_size=3,
                       padding=1,
                       bias=False,
                       ws=ws),
                nn.GroupNorm(32, head_conv)
                if gn else nn.BatchNorm2d(head_conv), nn.ReLU(inplace=True),
                nn.Conv2d(head_conv, num_output, kernel_size=1))
            if 'hm' in head:
                fc[-1].bias.data.fill_(-2.19)
            else:
                fill_fc_weights(fc)
            self.__setattr__(head, fc)
 def __init__(self):
     super(FeatureExtractor, self).__init__()
     self.model = pretrainedmodels.resnet152()
     self.FEAT_SIZE = 2048
Пример #16
0
def build_model(model_name):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load pretrained model

    model_name = model_name # could be fbresnet152 or inceptionresnetv2

    if(model_name == 'senet154'):
        model = pretrainedmodels.senet154(pretrained='imagenet')
    elif(model_name == 'se_resnet152'):
        model = pretrainedmodels.se_resnet152(pretrained='imagenet')
    elif(model_name == 'se_resnext101_32x4d'):
        model = pretrainedmodels.se_resnext101_32x4d(pretrained='imagenet')
    elif(model_name == 'resnet152'):
        model = pretrainedmodels.resnet152(pretrained='imagenet')
    elif(model_name == 'resnet101'):
        model = pretrainedmodels.resnet101(pretrained='imagenet')
    elif(model_name == 'densenet201'):
        model = pretrainedmodels.densenet201(pretrained='imagenet')

    model.to(device)
    for param in model.parameters():
        param.requires_grad = False

    num_ftrs = model.last_linear.in_features

    class CustomModel(nn.Module):
        def __init__(self, model):
            super(CustomModel, self).__init__()
            self.features = nn.Sequential(*list(model.children())[:-1]  )
            self.classifier = nn.Sequential(
                torch.nn.Linear(num_ftrs, 128),
                torch.nn.Dropout(0.3),  # drop 50% of the neuron
                torch.nn.Linear(128, 7)
            )
        
        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x
    model = CustomModel(model)
    freeze_layer(model.features)
    model.to(device)
    for param in model.parameters():
        param.requires_grad = False

    
    class CustomModel1(nn.Module):
        def __init__(self, model):
            super(CustomModel1, self).__init__()
            self.features = nn.Sequential(*list(model.children())[:-1])
            self.classifier = nn.Sequential(
                *[list(model.classifier.children())[i] for i in [0]]
            )
        
        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    CustomModel = CustomModel1(model)
    num_ftrs = list(CustomModel.classifier.children())[-1].out_features
    CustomModel.to(device)
    return CustomModel, num_ftrs
Пример #17
0
    def __init__(self,
                 config_file: Optional[str] = None,
                 override_list: List[Any] = []):
        _C = CN()
        _C.VALID_IMAGES = [
            'CXR1576_IM-0375-2001.png', 'CXR1581_IM-0378-2001.png',
            'CXR3177_IM-1497-2001.png', 'CXR2585_IM-1082-1001.png',
            'CXR1125_IM-0082-1001.png', 'CXR3_IM-1384-2001.png',
            'CXR1565_IM-0368-1001.png', 'CXR1105_IM-0072-1001-0001.png',
            'CXR2874_IM-1280-1001.png', 'CXR1886_IM-0574-1001.png'
        ]

        _C.MODELS = [{
            'resnet18': (pretrainedmodels.resnet18(pretrained=None), 512, 224),
            'resnet50':
            (pretrainedmodels.resnet50(pretrained=None), 2048, 224),
            'resnet101':
            (pretrainedmodels.resnet101(pretrained=None), 2048, 224),
            'resnet152':
            (pretrainedmodels.resnet152(pretrained=None), 2048, 224),
            'inception_resnet_v2':
            (pretrainedmodels.inceptionresnetv2(pretrained=None), 1536, 299)
        }]

        # _C.MODELS_FEATURE_SIZE = {'resnet18':512, 'resnet50':2048, 'resnet101':2048, 'resnet152':2048,
        #                           'inception_v3':2048, 'inception_resnet_v2':1536}

        # Random seed for NumPy and PyTorch, important for reproducibility.
        _C.RANDOM_SEED = 42
        # Opt level for mixed precision training using NVIDIA Apex. This can be
        # one of {0, 1, 2}. Refer NVIDIA Apex docs for their meaning.
        _C.FP16_OPT = 2

        # Path to the dataset root, which structure as per README. Path is
        # assumed to be relative to project root.
        _C.IMAGE_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/Images_2'
        _C.TRAIN_JSON_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/iu_xray_train_2.json'
        _C.VAL_JSON_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/iu_xray_val_2.json'
        _C.TEST_JSON_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/iu_xray_test_2.json'
        _C.PRETRAINED_EMDEDDING = False
        # Path to .vocab file generated by ``sentencepiece``.
        _C.VOCAB_FILE_PATH = "/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/Vocab/indiana.vocab"
        # Path to .model file generated by ``sentencepiece``.
        _C.VOCAB_MODEL_PATH = "/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/Vocab/indiana.model"
        _C.VOCAB_SIZE = 3000
        _C.EPOCHS = 1024
        _C.BATCH_SIZE = 10
        _C.TEST_BATCH_SIZE = 100
        _C.ITERATIONS_PER_EPOCHS = 1
        _C.WEIGHT_DECAY = 1e-5
        _C.NUM_LABELS = 41
        _C.IMAGE_SIZE = 299
        _C.MAX_SEQUENCE_LENGTH = 130
        _C.DROPOUT_RATE = 0.1
        _C.D_HEAD = 64

        _C.TRAIN_DATASET_LENGTH = 25000
        _C.INFERENCE_TIME = False
        _C.COMBINED_N_LAYERS = 1
        _C.BEAM_SIZE = 50
        _C.PADDING_INDEX = 0
        _C.EOS_INDEX = 3
        _C.SOS_INDEX = 2
        _C.USE_BEAM_SEARCH = True
        _C.EXTRACTED_FEATURES = False
        _C.IMAGE_MODEL_PATH = '/netscratch/gsingh/MIMIC_CXR/Results/Image_Feature_Extraction/MIMIC_CXR_No_ES/model.pth'

        _C.EMBEDDING_DIM = 8192
        _C.CONTEXT_SIZE = 1024
        _C.LR_COMBINED = 1e-4
        _C.MAX_LR = 1e-1
        _C.SAVED_DATASET = False
        _C.MODEL_NAME = 'inception_resnet_v2'
        INIT_PATH = '/netscratch/gsingh/MIMIC_CXR/Results/Modified_Transformer/Indiana_15_10_2020_2/'
        _C.SAVED_DATASET_PATH_TRAIN = INIT_PATH + 'DataSet/train_dataloader.pth'
        _C.SAVED_DATASET_PATH_VAL = INIT_PATH + 'DataSet/val_dataloader.pth'
        _C.SAVED_DATASET_PATH_TEST = INIT_PATH + 'DataSet/test_dataloader.pth'

        _C.CHECKPOINT_PATH = INIT_PATH + 'CheckPoints'
        _C.MODEL_PATH = INIT_PATH + 'combined_model.pth'
        _C.MODEL_STATE_DIC = INIT_PATH + 'combined_model_state_dic.pth'
        _C.FIGURE_PATH = INIT_PATH + 'Graphs'
        _C.CSV_PATH = INIT_PATH
        _C.TEST_CSV_PATH = INIT_PATH + 'test_output_image_feature_input.json'
        self._C = _C
        if config_file is not None:
            self._C.merge_from_file(config_file)
        self._C.merge_from_list(override_list)

        self.add_derived_params()

        # Make an instantiated object of this class immutable.
        self._C.freeze()
Пример #18
0
def Model_builder(configer):

    model_name = configer.model['name']
    No_classes = configer.dataset_cfg["id_cfg"]["num_classes"]
    model_pretrained = configer.model['pretrained']
    model_dataparallel = configer.model["DataParallel"]
    model_gpu_replica = configer.model["Multi_GPU_replica"]
    gpu_ids = configer.train_cfg["gpu"]

    if model_name == "Inceptionv3":
        model = PM.inceptionv3(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Xception":
        model = PM.xception(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "VGG_19":
        model = PM.vgg19(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet18":
        model = PM.resnet18(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet50":
        model = PM.resnet50(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet101":
        model = PM.resnet101(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet152":
        model = PM.resnet152(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet34":
        model = PM.resnet34(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Densenet121":
        model = PM.densenet121(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "ResNeXt101-32":
        model = PM.resnext101_32x4d(num_classes=1000,
                                    pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "ResNeXt101-64":
        model = PM.resnext101_64x4d(num_classes=1000,
                                    pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "MobilenetV2":
        model = MobileNetV2(n_class=No_classes)

    else:
        raise ImportError("Model Architecture not supported")

    # Performing Data Parallelism if configured

    if model_dataparallel:

        model = torch.nn.DataParallel(model.to(device), device_ids=gpu_ids)

    elif model_gpu_replica:

        torch.distributed.init_process_group(backend='nccl',
                                             world_size=1,
                                             rank=1)
        model = torch.nn.DistributedDataParallel(model.to(device),
                                                 device_ids=gpu_ids)

    else:
        model = model.to(device)

    print('---------- Model Loaded')

    return model
    args.device = torch.device(
        'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    args.output_dir = os.path.join(args.output_dir, args.model)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    params = vars(args)

    if params['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(num_classes=1000,
                                           pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inceptionresnetv2':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionresnetv2(num_classes=1000,
                                                   pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    else:
Пример #20
0
def model_152(pretrained=True, **kwargs):
    return pretrainedmodels.resnet152(num_classes=1000, pretrained='imagenet')
Пример #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--video_dir',
        type=str,
        default='../msrvtt_2017/train-video',
        help='The video dir that one would like to extract audio file from')
    parser.add_argument('--output_dir',
                        type=str,
                        default='../msrvtt_2017/preprocessed',
                        help='The file output directory')
    parser.add_argument(
        '--output_channels',
        type=int,
        default=1,
        help='The number of output audio channels, default to 1')
    parser.add_argument(
        '--output_frequency',
        type=int,
        default=16000,
        help='The output audio frequency in Hz, default to 16000')
    parser.add_argument(
        '--band_width',
        type=int,
        default=160,
        help=
        'Bandwidth specified to sample the audio (unit in kbps), default to 160'
    )
    parser.add_argument(
        '--model',
        type=str,
        default='resnet152',
        help=
        'The pretrained model to use for extracting image features, default to resnet152'
    )
    parser.add_argument('--gpu',
                        type=str,
                        default='0',
                        help='The CUDA_VISIBLE_DEVICES argument, default to 0')
    parser.add_argument(
        '--n_frame_steps',
        type=int,
        default=80,
        help='The number of frames to extract from a single video')
    opt = parser.parse_args()
    opt = vars(opt)

    if not os.path.exists(opt['output_dir']):
        os.mkdir(opt['output_dir'])
    vToA(opt)
    split_audio(opt)
    print('cleaning up original .wav files...')
    dir = opt['output_dir']
    dir = os.listdir(dir)
    for file in dir:
        if file.endswith('.wav'):
            os.remove(os.path.join(opt['output_dir'], file))

    os.environ['CUDA_VISIBLE_DEVICES'] = opt['gpu']
    if opt['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg16':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg16(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    else:
        print('The image model is not supported')

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)

    model = model.cuda()
    extract_image_feats(opt, model, load_image_fn)