예제 #1
0
def main():
    """Main process"""
    args = parse_args()
    config = parse_yaml(args.config_path)["avel"]
    train_config = config["train"]
    model_config = config["model"]

    # If audio and visual features has not been extracted,
    # extract them from video file, and save them.
    if args.extract_features is True:
        extract_feature(args.ave_root)

    model = DMRFE(
        128,
        512,
        7 * 7,
        model_config["att_embed_dim"],
        model_config["lstm_hidden_dim"],
        model_config["lstm_num_layers"],
        model_config["target_size"],
    )

    # AVE training dataset.
    train_ds = AVELDataset(
        args.ave_root,
        args.train_annot,
        args.features_path,
        train_config["batch_size"],
        model_config["target_size"],
    )

    # AVE validation dataset.
    valid_ds = AVELDataset(
        args.ave_root,
        args.valid_annot,
        args.features_path,
        train_config["batch_size"],
        model_config["target_size"],
    )

    training = Training(
        model,
        train_ds,
        valid_ds,
        train_config["batch_size"],
        train_config["epoch"],
        train_config["learning_rate"],
        train_config["valid_span"],
        train_config["save_span"],
        train_config["save_dir"],
    )

    training.train()
def concat_features(file_list):
    """
    Extract features from audio files and then concate them into an array.
    """
    # calculate the number of dimensions
    n_features = []
    for file_id, file_name in enumerate(file_list):

        # extract feature from audio file.
        feature = util.extract_feature(file_name, CONFIG["feature"])
        feature = feature[:: CONFIG["feature"]["n_hop_frames"], :]

        if file_id == 0:
            features = numpy.zeros(
                (
                    len(file_list) * feature.shape[0],
                    CONFIG["feature"]["n_mels"] * CONFIG["feature"]["n_frames"],
                ),
                dtype=float,
            )

        features[
            feature.shape[0] * file_id : feature.shape[0] * (file_id + 1), :
        ] = feature

        n_features.append(feature.shape[0])

    return features, n_features
예제 #3
0
    def __init__(self, files, transform=None):

        self.transform = transform

        for file_id, file_name in enumerate(files):

            # shape = (#frames, #dims)
            features = util.extract_feature(file_name,
                                            config=CONFIG["feature"])
            features = features[::CONFIG["feature"]["n_hop_frames"], :]

            if file_id == 0:
                # shape = (#total frames over all audio files, #dim. of feature vector)
                dataset = numpy.zeros(
                    (
                        features.shape[0] * len(files),
                        CONFIG["feature"]["n_mels"] *
                        CONFIG["feature"]["n_frames"],
                    ),
                    numpy.float32,
                )

            dataset[features.shape[0] * file_id:features.shape[0] *
                    (file_id + 1), :] = features

        self.feat_data = dataset

        train_size = int(
            len(dataset) * (1.0 - CONFIG["training"]["validation_split"]))
        print("train_size: %d, val_size: %d" % (
            train_size,
            int(len(dataset) * CONFIG["training"]["validation_split"]),
        ))
예제 #4
0
def calc_anomaly_score(model, file_path):
    """
    Calculate anomaly score.
    """
    try:
        data = util.extract_feature(file_path, config=CONFIG["feature"])
    except FileNotFoundError:
        print("File broken!!: {}".format(file_path))

    feed_data = torch.from_numpy(data).clone()
    feed_data.to(DEVICE)
    feed_data = feed_data.float()
    with torch.no_grad():
        pred = model(feed_data)
        pred = pred.to("cpu").detach().numpy().copy()

    errors = np.mean(np.square(data - pred), axis=1)  # average over dim.

    return np.mean(errors)  # average over frames
def calc_anomaly_score(model, file_path, section_index):
    """
    Calculate anomaly score.
    """
    try:
        # extract features (log-mel spectrogram)
        data = util.extract_feature(file_name=file_path, config=CONFIG["feature"])
        data = data.reshape(
            (  # must be a tuple of ints
                data.shape[0],
                1,
                CONFIG["feature"]["n_frames"],
                CONFIG["feature"]["n_mels"],
            )
        )
    except FileNotFoundError:
        print("File broken!!: {}".format(file_path))

    condition = numpy.zeros((data.shape[0]), dtype=int)
    if section_index != -1:
        condition[:] = section_index

    feed_data = torch.from_numpy(data).clone()
    feed_data = feed_data.to(DEVICE).float()
    with torch.no_grad():
        output = model(feed_data)  # notice: unnormalized output
        output = output.to("cpu").detach().numpy().copy()  # tensor to numpy array.

    output = softmax(output, axis=1)
    prob = output[:, section_index]

    y_pred = numpy.mean(
        numpy.log(
            numpy.maximum(1.0 - prob, sys.float_info.epsilon)
            - numpy.log(numpy.maximum(prob, sys.float_info.epsilon))
        )
    )

    return y_pred