Python ClassificationDataset примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyradigm

Класс/Тип: ClassificationDataset

Примеров на hotexamples.com: 7

Python ClassificationDataset - 7 примеров найдено. Это лучшие примеры Python кода для pyradigm.ClassificationDataset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

add_samplet(4)

ClassificationDataset(3)

save(3)

from_arff(1)

Пример #1

Показать файл

Файл: test_BaseDataset_common_behaviours.py Проект: growupboron/pyradigm

def test_sanity_checks():
    """Ensure that sanity checks are performed, and as expected."""

    ### -------------- as you add them to dataset --------------
    with raises(EmptyFeatureSetException):
        ds.add_samplet('empty_features', [], 'target')

    ### -------------- as you save them to disk --------------

    ds.add_samplet('all_zeros', np.zeros((ds.num_features, 1)), 'target')
    with raises(ConstantValuesException):
        ds.save(out_file)

    ds.del_samplet('all_zeros')

    # checking for random constant value!
    const_value = np.random.randint(10, 100)
    const_feat_set = np.full((ds.num_features, 1), const_value)
    ds.add_samplet('all_constant', const_feat_set, 'target')
    with raises(ConstantValuesException):
        ds.save(out_file)

    # now checking for constants across samplets
    #   this is easily achieved by adding different samplets with same features
    #   such a bug is possible, when user made a mistake querying
    #   the right files for the right samplet ID
    const_ds = ClfDataset()
    rand_feat_same_across_samplets = np.random.randn(10)
    for index in range(np.random.randint(10, 100)):
        const_ds.add_samplet(str(index), rand_feat_same_across_samplets, index)

    with raises(ConstantValuesException):
        const_ds.save(out_file)

Пример #2

Показать файл

def make_random_Dataset(max_num_classes=20,
                        max_class_size=50,
                        max_dim=100,
                        stratified=True):
    "Generates a random Dataset for use in testing."

    smallest = 10
    max_class_size = max(smallest, max_class_size)
    largest = max(50, max_class_size)
    largest = max(smallest + 3, largest)

    if max_num_classes != 2:
        num_classes = np.random.randint(2, max_num_classes, 1)
    else:
        num_classes = 2

    if type(num_classes) == np.ndarray:
        num_classes = num_classes[0]
    if not stratified:
        class_sizes = np.random.random_integers(smallest,
                                                largest,
                                                size=[num_classes, 1])
    else:
        class_sizes = np.repeat(np.random.randint(smallest, largest),
                                num_classes)

    num_features = np.random.randint(min(3, max_dim), max(3, max_dim), 1)[0]
    feat_names = [str(x) for x in range(num_features)]

    class_ids = list()
    labels = list()
    for cl in range(num_classes):
        class_ids.append('class-{}'.format(cl))
        labels.append(int(cl))

    ds = ClfDataset()
    for cc, class_ in enumerate(class_ids):
        subids = [
            'sub{:03}-class{:03}'.format(ix, cc)
            for ix in range(class_sizes[cc])
        ]
        for sid in subids:
            ds.add_samplet(samplet_id=sid,
                           features=feat_generator(num_features),
                           target=class_,
                           feature_names=feat_names)

    return ds

Пример #3

Показать файл

Файл: test_rhst.py Проект: raamana/neuropredict

def make_fully_separable_classes(max_class_size=50, max_dim=100):
    from sklearn.datasets import make_blobs

    random_center = np.random.rand(max_dim)
    cluster_std = 1.5
    centers = [random_center, random_center + cluster_std * 6]
    blobs_X, blobs_y = make_blobs(n_samples=max_class_size, n_features=max_dim,
                                  centers=centers, cluster_std=cluster_std)

    unique_labels = np.unique(blobs_y)
    class_ids = {lbl: str(lbl) for lbl in unique_labels}

    new_ds = ClfDataset()
    for index, row in enumerate(blobs_X):
        new_ds.add_samplet(samplet_id='sub{}'.format(index),
                           features=row,  # label=blobs_y[index],
                           target=class_ids[blobs_y[index]])

    return new_ds

Пример #4

Показать файл

Файл: utils.py Проект: growupboron/pyradigm

def load_arff_dataset(ds_path):
    """Convenience utility to quickly load ARFF files into pyradigm format"""

    try:
        ds = ClassificationDataset.from_arff(ds_path)
    except:
        try:
            ds = RegressionDataset.from_arff(ds_path)
        except:
            try:
                ds = MLDataset(arff_path=ds_path)
            except:
                raise TypeError(
                    'Error in loading the ARFF dataset @ path below!'
                    ' Ignoring {}'.format(ds_path))

    return ds

Пример #5

Показать файл

Файл: utils.py Проект: growupboron/pyradigm

def load_dataset(ds_path):
    """Convenience utility to quickly load any type of pyradigm dataset"""

    try:
        ds = ClassificationDataset(dataset_path=ds_path)
    except:
        try:
            ds = RegressionDataset(dataset_path=ds_path)
        except:
            try:
                warn(
                    'MLDtaset is deprecated. Switch to the latest pyradigm data '
                    'structures such as ClassificationDataset or '
                    'RegressionDataset as soon as possible.')
                ds = MLDataset(filepath=ds_path)
            except:
                raise TypeError('Dataset class @ path below not recognized!'
                                ' Must be a valid instance of one of '
                                'ClassificationDataset or '
                                'RegressionDataset or MLDataset.\n'
                                ' Ignoring {}'.format(ds_path))

    return ds

Пример #6

Показать файл

def get_features(samplet_id_list,
                 classes,
                 featdir,
                 outdir,
                 outname,
                 get_method=None,
                 feature_type='dir_of_dris'):
    """
    Populates the pyradigm data structure with features from a given method.

    Parameters
    ----------
    samplet_id_list : list or ndarray
        List of subject IDs
    classes : dict
        dict of class labels keyed in by subject id
    featdir : str
        Path to input directory to read the features from
    outdir : str
        Path to output directory to save the gathered features to.
    outname : str
        Name of the feature set
    get_method : callable
        Callable that takes in a path and returns a vectorized feature set
        e.g. set of subcortical volumes, with an optional array of names for each
        feature.
    feature_type : str
        Identifier of data organization for features.

    Returns
    -------
    saved_path : str
        Path where the features have been saved to as a pyradigm dataset

    """

    if not callable(get_method):
        raise ValueError("Supplied get_method is not callable! "
                         " It must take in a path and "
                         "return a vectorized feature set and labels.")

    # generating an unique numeric label for each class
    # (sorted in order of their appearance in metadata file)
    class_set = set(classes.values())
    class_labels = dict()
    for idx, cls in enumerate(class_set):
        class_labels[cls] = idx

    ids_excluded = list()

    if feature_type == 'data_matrix':
        data_matrix = get_data_matrix(featdir)

    ds = ClassificationDataset()
    for samplet_id in samplet_id_list:
        try:
            if feature_type == 'data_matrix':
                data = data_matrix[samplet_id_list.index(samplet_id), :]
                feat_names = None
            else:
                data, feat_names = get_method(featdir, samplet_id)

            ds.add_samplet(samplet_id=samplet_id,
                           features=data,
                           target=classes[samplet_id],
                           feature_names=feat_names)
        except:
            ids_excluded.append(samplet_id)
            traceback.print_exc()
            warnings.warn(
                "Features for {} via {} method could not be read or added."
                " Excluding it.".format(samplet_id, get_method.__name__))

    # warning for if failed to extract features even for one subject
    alert_failed_feature_extraction(len(ids_excluded), ds.num_samplets,
                                    len(samplet_id_list))

    # save the dataset to disk to enable passing on multiple dataset(s)
    saved_path = realpath(pjoin(outdir, outname))
    try:
        ds.save(saved_path)
    except IOError as ioe:
        print('Unable to save {} features to disk in folder:\n{}'
              ''.format(outname, outdir))
        raise ioe

    return saved_path

Пример #7

Показать файл

estimator = 'randomforestclassifier'  # 'svm' #
dr_method = 'isomap'  # 'selectkbest_f_classif' # 'variancethreshold'  #
dr_size = 'tenth'
gs_level = 'none'  # 'light'

random.seed(42)  # to save time for local tests

covar_list = ('age', 'gender', 'dummy')
covar_types = ('age', 'gender', 'float')
covar_arg = ' '.join(['age', 'gender'])
deconf_method = 'residualize'

out_path1 = os.path.join(out_dir, 'random_clf_ds1.pkl')
out_path2 = os.path.join(out_dir, 'random_clf_ds2.pkl')
if pexists(out_path1) and pexists(out_path2):
    ds_one = ClassificationDataset(dataset_path=out_path1)
    ds_two = ClassificationDataset(dataset_path=out_path2)
else:
    ds_one = make_random_ClfDataset(max_num_classes=max_num_classes,
                                    stratified=True,
                                    max_class_size=max_class_size,
                                    max_dim=max_dim,
                                    min_num_classes=min_num_classes,
                                    attr_names=covar_list,
                                    attr_types=covar_types)
    ds_one.save(out_path1)

    ds_two = dataset_with_new_features_same_everything_else(ds_one, max_dim)
    ds_two.save(out_path2)

A = 0