Пример #1
0
def processDatasetSplit(train_source_path, test_source_path, logger=None):
    logger.info('  Training data: %s ', train_source_path)
    X_training, y_training, valid_frames_training = preprocessWavs.preprocess_dataset(
        source_path=train_source_path,
        logger=logger,
        nbMFCCs=nbMFCCs,
        debug=debug_size)
    logger.info('  Test data: %s', test_source_path)
    X_test, y_test, valid_frames_test = preprocessWavs.preprocess_dataset(
        source_path=test_source_path,
        logger=logger,
        nbMFCCs=nbMFCCs,
        debug=debug_size)
    return X_training, y_training, valid_frames_training, X_test, y_test, valid_frames_test
Пример #2
0
def processDataset(FRAC_TRAINING, data_source_path, logger=None):
    logger.info('  Data: %s ', data_source_path)
    X_all, y_all, valid_frames_all = preprocessWavs.preprocess_dataset(
        source_path=data_source_path,
        nbMFCCs=nbMFCCs,
        logger=logger,
        debug=debug_size)
    assert len(X_all) == len(y_all) == len(valid_frames_all)

    logger.info(' Loading data complete.')
    logger.debug('Type and shape/len of X_all')
    logger.debug('type(X_all): {}'.format(type(X_all)))
    logger.debug('type(X_all[0]): {}'.format(type(X_all[0])))
    logger.debug('type(X_all[0][0]): {}'.format(type(X_all[0][0])))
    logger.debug('type(X_all[0][0][0]): {}'.format(type(X_all[0][0][0])))
    logger.info('Creating Validation index ...')

    total_size = len(X_all)  # TOTAL = TRAINING + TEST = TRAIN + VAL + TEST
    total_training_size = int(math.ceil(FRAC_TRAINING *
                                        total_size))  # TRAINING = TRAIN + VAL
    test_size = total_size - total_training_size

    # split off a 'test' dataset
    test_idx = random.sample(range(0, total_training_size), test_size)
    test_idx = [int(i) for i in test_idx]
    # ensure that the testidation set isn't empty
    if DEBUG:
        test_idx[0] = 0
        test_idx[1] = 1
    logger.info('Separating test and training set ...')
    X_training = []
    y_training = []
    valid_frames_training = []
    X_test = []
    y_test = []
    valid_frames_test = []
    for i in range(len(X_all)):
        if i in test_idx:
            X_test.append(X_all[i])
            y_test.append(y_all[i])
            valid_frames_test.append(valid_frames_all[i])
        else:
            X_training.append(X_all[i])
            y_training.append(y_all[i])
            valid_frames_training.append(valid_frames_all[i])

    assert len(X_test) == test_size
    assert len(X_training) == total_training_size

    return X_training, y_training, valid_frames_training, X_test, y_test, valid_frames_test
    def processDataset(FRAC_TRAINING, data_source_path, logger=None):
        logger.info('  Data: %s ', data_source_path)
        X_test, y_test, valid_frames_test = preprocessWavs.preprocess_dataset(
            source_path=data_source_path,
            nbMFCCs=nbMFCCs,
            logger=logger,
            debug=None)
        assert len(X_test) == len(y_test) == len(valid_frames_test)

        logger.info(' Loading data complete.')
        logger.debug('Type and shape/len of X_test')
        logger.debug('type(X_test): {}'.format(type(X_test)))
        logger.debug('type(X_test[0]): {}'.format(type(X_test[0])))
        logger.debug('type(X_test[0][0]): {}'.format(type(X_test[0][0])))
        logger.debug('type(X_test[0][0][0]): {}'.format(type(X_test[0][0][0])))

        return X_test, y_test, valid_frames_test
            def preprocessLabeledWavs(wavDir, store_dir, name):
                # fixWavs -> suppose this is done
                # convert to pkl
                X, y, valid_frames = preprocessWavs.preprocess_dataset(
                    source_path=wavDir,
                    nbMFCCs=nbMFCCs,
                    logger=logger_evaluate)

                X_data_type = 'float32'
                X = preprocessWavs.set_type(X, X_data_type)
                y_data_type = 'int32'
                y = preprocessWavs.set_type(y, y_data_type)
                valid_frames_data_type = 'int32'
                valid_frames = preprocessWavs.set_type(valid_frames,
                                                       valid_frames_data_type)

                return X, y, valid_frames