def get_coco(root, image_set, transforms, mode='instances'):
    anno_file_template = "{}_{}2017.json"
    PATHS = {
        "train": ("train2017",
                  os.path.join("annotations",
                               anno_file_template.format(mode, "train"))),
        "val": ("val2017",
                os.path.join("annotations",
                             anno_file_template.format(mode, "val"))),
        # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
    }

    t = [ConvertCocoPolysToMask()]

    if transforms is not None:
        t.append(transforms)
    transforms = T.Compose(t)

    img_folder, ann_file = PATHS[image_set]
    img_folder = os.path.join(root, img_folder)
    ann_file = os.path.join(root, ann_file)

    dataset = CocoDetection(img_folder, ann_file, transforms=transforms)

    if image_set == "train":
        dataset = _coco_remove_images_without_annotations(dataset)

    # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])

    return dataset
Пример #2
0
def BOT_recognition(model_BOT, img):

    transform = T.Compose([
        T.Resize((256, 128)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    loader = DataLoader(
        Load_person(img, transform=transform),
        batch_size=1,
        shuffle=False,
        num_workers=0,
        pin_memory=True,
        drop_last=False,
    )
    model_BOT.eval()

    with torch.no_grad():
        for batch_idx, img2 in enumerate(loader):
            if torch.cuda.is_available(): img2 = img2.cuda()
            # score = model_BOT(img2)
            gender_outputs, staff_outputs, customer_outputs, stand_outputs, sit_outputs, phone_outputs = model_BOT(
                img2)

            staff_con = float(Confidence(staff_outputs, 1))
            customer_con = float(Confidence(customer_outputs, 1))
            stand_con = float(Confidence(stand_outputs, 1))
            sit_con = float(Confidence(sit_outputs, 1))
            play_with_phone_con = float(Confidence(phone_outputs, 1))
            male_con = float(Confidence(gender_outputs, 0))
            female_con = float(Confidence(gender_outputs, 1))

            staff_con = 1 if staff_con > 0.5 else 0
            customer_con = 1 if customer_con > 0.5 else 0
            stand_con = 1 if stand_con > 0.5 else 0
            sit_con = 1 if sit_con > 0.5 else 0
            play_with_phone_con = 1 if play_with_phone_con > 0.1 else 0
            male_con = 1 if male_con > 0.5 else 0
            female_con = 1 if female_con > 0.5 else 0

    return staff_con, customer_con, stand_con, sit_con, play_with_phone_con, male_con, female_con
Пример #3
0
def get_transform(args):

    melkwargs = {
        'n_mels': args.nfilt,
        'n_fft': args.nfft,
        'win_length': int(args.winlen * args.signal_sr),
        'hop_length': int(args.winstep * args.signal_sr),
    }
    args.signal_samples = int(args.signal_sr * args.signal_len)

    args.signal_width = int(
        np.ceil((args.signal_len - args.winlen) / args.winstep) + 1)
    if args.features_name.lower() == 'logfbes':
        features = transforms.Compose([
            transforms.LogFBEs(args.signal_sr, args.winlen, args.winstep,
                               args.nfilt, args.nfft, args.preemph),
            transforms.ToTensor(),
        ])
        args.nfeature = args.nfilt
        args.signal_width = int(
            np.ceil((args.signal_len - args.winlen) / args.winstep) + 1)
    elif args.features_name.lower() == 'mfccs':
        features = transforms.Compose([
            transforms.MFCCs(args.signal_sr, args.winlen, args.winstep,
                             args.numcep, args.nfilt, args.nfft, args.preemph,
                             args.ceplifter),
            transforms.ToTensor(),
        ])
        args.nfeature = args.numcep
        args.signal_width = int(
            np.ceil((args.signal_len - args.winlen) / args.winstep) + 1)
    elif args.features_name.lower() == 'ta.mfccs':
        features = transforms.Compose([
            # transforms.ToTensor(),
            torchaudio.transforms.MFCC(sample_rate=args.signal_sr,
                                       n_mfcc=args.numcep,
                                       melkwargs=melkwargs),
        ])
        args.nfeature = args.numcep
        args.signal_width = 1 + args.signal_samples // melkwargs['hop_length']
        # args.signal_width = 81;
    elif args.features_name.lower() == 'ta.logfbes':
        log_offset = 1e-6
        features = transforms.Compose([
            # transforms.ToTensor(),
            torchaudio.transforms.MelSpectrogram(sample_rate=args.signal_sr,
                                                 **melkwargs),
            transforms.Lambda(lambda t: torch.log(t + log_offset)),
        ])
        args.nfeature = args.nfilt
        args.signal_width = 1 + args.signal_samples // melkwargs['hop_length']
        # args.signal_width = 81;
    else:
        raise Exception(
            '--features_name should be one of {LogFBEs | MFCCs | ta.MFCCs | ta.LogFBEs}'
        )

    # Composing transforms
    test_trasform = transforms.Compose([
        features,
        transforms.Lambda(lambda x: x.unsqueeze(0)),
    ])

    silence_transform = transforms.Compose([
        transforms.Lambda(lambda x: x * random.uniform(0.0, args.silence_vol)),
        test_trasform,
    ])

    args.signal_samples = int(args.signal_sr * args.signal_len)
    args.bkg_noise_path = 'background_noise'

    train_transform = transforms.Compose([
        # transforms.TimeShifting2(shift_min=args.shift_min, shift_max=args.shift_max),
        # transforms.RandomApplyTransform(p=args.p_transform, transform=transforms.AddNoise3(
        #     os.path.join(args.data_root, args.bkg_noise_path),
        #     args.noise_vol, args.signal_samples, args.signal_sr)),
        test_trasform,
        # torchaudio.transforms.TimeMasking(args.mask_time),
        # torchaudio.transforms.TimeMasking(args.mask_time),
        # torchaudio.transforms.FrequencyMasking(args.mask_freq),
    ])

    return {
        'train': train_transform,
        'val': test_trasform,
        'test': test_trasform
    }, silence_transform
Пример #4
0
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
Пример #5
0
def get_transform(args):

    melkwargs = {
        'n_mels': args.nfilt,
        'n_fft': args.nfft,
        'win_length': int(args.winlen * args.signal_sr),
        'hop_length': int(args.winstep * args.signal_sr),
    }
    args.signal_samples = int(args.signal_sr * args.signal_len)
    args.signal_width = 1 + args.signal_samples // melkwargs['hop_length']
    args.signal_samples = int(args.signal_sr * args.signal_len)
    args.bkg_noise_path = 'background_noise'


    if args.features_name.lower() == 'mfccs':
        features = transforms.Compose([
            torchaudio.transforms.MFCC(sample_rate=args.signal_sr, n_mfcc=args.numcep, log_mels=True, melkwargs=melkwargs),
            transforms.Lambda(lambda x: x[1:]),
        ])
        args.nfeature = args.numcep - 1
    elif args.features_name.lower() == 'logfbes':
        log_offset = 1e-6
        features = transforms.Compose([
            torchaudio.transforms.MelSpectrogram(sample_rate=args.signal_sr, **melkwargs),
            transforms.Lambda(lambda t: torch.log(t + log_offset)),
        ])
        args.nfeature = args.nfilt
    else:
        raise Exception('--features_name should be one of {LogFBEs | MFCCs}')


    def debug(x):
        print(x.size())
        return x

    # Composing transforms
    test_trasform = transforms.Compose([
        features,
        transforms.Lambda(lambda x: x.unsqueeze(0)),
    ])

    silence_transform = transforms.Compose([
        transforms.Lambda(lambda x: x * random.uniform(0.0, args.silence_vol)),
        test_trasform,
    ])

    if args.no_augmentations:
        train_transform = test_trasform
    else:
        train_transform = transforms.Compose([
            transforms.TimeShifting2(shift_min=args.shift_min, shift_max=args.shift_max),
            transforms.AddNoise3(
                os.path.join(args.data_root, args.bkg_noise_path),
                args.noise_vol, args.signal_samples, args.signal_sr),
            test_trasform,
            torchaudio.transforms.TimeMasking(args.mask_time),
            torchaudio.transforms.TimeMasking(args.mask_time),
            torchaudio.transforms.FrequencyMasking(args.mask_freq),
        ])

    return {'train': train_transform, 'val': test_trasform, 'test': test_trasform}, silence_transform
Пример #6
0
    handlers=[
        logging.FileHandler(
            os.path.join(
                log_dir,
                f'{datetime.now().strftime("%Y%m%d-%H%M%S")}_{args.model}.log')
        ),
        logging.StreamHandler()
    ])
####### BASE TRANSFORMS ##########
base_transforms = T.Compose([
    T.DataCreator(
    ),  # convert value-dictionary to instance of MultiNodeData-class
    T.DataLabeler(min_occurrence=0.5),  # label nodes based on tag occurrences
    T.NodeSelector(include_nodes=args.include_nodes
                   ),  # only use subset of nodes if required 
    T.MetricSelector(include_metrics=args.include_metrics
                     ),  # only use subset of metrics if required
    T.AnomalySelector(exclude_anomalies=args.exclude_anomalies
                      ),  # filter anomalies if exclusions are specified
    T.NodeConnector(
        use_synthetic_data=use_synthetic_data)  # add adj-matrix (sparse)
])
logging.info(f"Base transforms: {base_transforms}")
########## PREPARE DATA ##########
logging.info("Read dataset...")
data_root = args.data_root
dataset = MultiNodeDataset(data_root,
                           args.input_dimension,
                           args.stride,
                           use_synthetic_data=use_synthetic_data,
                           start_time=args.start_time,