Exemplo n.º 1
    def _fp(self, X):
        """The cluster workhorse

        X : 2D array-like (n_sample, n_feature)
            The data to decompose

        Xc - 2D array-like (n_sample, n_clusters)

        nrow = X.shape[0]

        clabels = self.estimator.fit_predict(X.transpose())
        uclabels = unique_nan(clabels)
        uclabels = sort_nanfirst(uclabels)
        # uclabels = sorted(np.unique(clabels))
        # uclabels = unique_sorted_with_nan(uclabels)

        # Average cluster examples, filling Xc
        Xc = np.zeros((nrow, len(uclabels)))         ## Init w/ 0
        for i, ucl in enumerate(uclabels):
            Xc[:,i] = X[:,ucl == clabels].mean(1)

        assert checkX(Xc)
        assert Xc.shape[0] == X.shape[0], ("After transform wrong row number")
        assert Xc.shape[1] == len(uclabels), ("Afer transform" 
            " wrong col number")

        return Xc
Exemplo n.º 2
    def _ft(self, X):
        """The decompose workhorse

        X : 2D array-like (n_sample, n_feature)
            The data to decompose

        Xc - 2D array-like (n_sample, n_components)
        Xc = self.estimator.fit_transform(X)

        assert checkX(Xc)
        assert Xc.shape[0] == X.shape[0], ("After transform wrong row number")

        # The n_components attr is optional
            assert Xc.shape[1] <= self.estimator.n_components, ("Too many" 
        except AttributeError:

        return Xc
Exemplo n.º 3
def eva(X, y, trial_index, window, tr):
    """Average trials for each feature in X

     X : 2D array-like (n_sample, n_feature)
         The data to decompose
     y : 1D array, None by default
         Sample labels for the data.  In y, np.nan and 'nan' values 
         are ignored.
     trial_index : 1D array (n_sample, )
         Each unique entry should match a trial.
     window : int 
         Trial length

     Xeva : a 2D arrays (n_feature*unique_y, window)
         The average trials
     feature_names : 1D array
         The names of the features (taken from y)

    evas = []
    eva_names = []
    scaler = MinMaxScaler(feature_range=(0, 1))
    for j in range(X.shape[1]):
        Xtrials = []
        xj = X[:,j][:,np.newaxis]  ## Need 2D

        # Each feature into trials, rescale too
        Xtrial, feature_names = by_trial(xj, trial_index, window, y)
        Xtrial = scaler.fit_transform(Xtrial.astype(np.float))
        unique_fn = sorted(np.unique(feature_names))
        unique_fn = unique_sorted_with_nan(unique_fn)

        # and again by unique_y/fe]ature_names
        Xlabels, _ = by_labels(X=Xtrial.transpose(), y=feature_names)

        # put all that togthether
        Xtrials.extend([Xl.transpose() for Xl in Xlabels])

        # and average the trials then
        # name names.
        evas.extend([Xt.mean(axis=1) for Xt in Xtrials])

    # Reshape : (window, len(unique_y)*n_features)
    Xeva = np.vstack(evas).transpose()
    eva_names = np.asarray(eva_names)

    assert checkX(Xeva)
    assert Xeva.shape[0] == window, ("After EVA rows not equal to window")
    assert Xeva.shape[1] == len(unique_fn) * X.shape[1], ("After" 
        "EVA wrong number of features")
    assert eva_names.shape[0] == Xeva.shape[1], ("eva_names and Xeva" 
        "don't match")

    return Xeva, eva_names
Exemplo n.º 4
def load_nii(nifiti, clean=True, sparse=False, smooth=False, **kwargs):
    """Convert the nifiti-1 file into a 2D array (n_sample x n_features).
    nifti - str
        The name of the data to load
    clean - boolean (True)
        Remove invariant features features?  If used n_features will 
        not match n_voxels in the orignal nifit1 file.  This operation
        is not reversable.  If you clean there is probablity little
        point in converting to a sparse representation.
    sparse - boolean (False)
        Use the (CSC) sparse format (True)?
    smooth - boolean (False)
        High/low pass filter the data?
    [, ...] - Optional parameters for smooth 
        (defaults: tr=1.5, ub=0.06, lb=0.006)

    X - 2D array (n_sample x n_features)
        The BOLD data

    # Data is 4d (x,y,z,t) we want 2d, where each column is
    # a voxel and each row is the temporal (t) data
    # i.e. the final shape should be (x*y*x, t)
    nii = nb.nifti1.load(nifiti)

    numt = nii.shape[3]
    numxyz = nii.shape[0] * nii.shape[1] * nii.shape[2]
    dims = (numxyz, numt)

    # Get into 2d (n_feature, n_sample)
    X = nii.get_data().astype('int16').reshape(dims).transpose()
    if clean:
        X = remove_invariant_features(X, sparse=False)

    if smooth:
        # Setup smooth params
        tr = 1.5
        ub = 0.06
        lb = 0.001
        if "tr" in kwargs:
            tr = kwargs["tr"]
        if "ub" in kwargs:
            ub = kwargs["ub"]
        if "lb" in kwargs:
            ub = kwargs["lb"]

        X = smoothfn(X, tr=tr, ub=ub, lb=lb)

    assert checkX(X)

    if sparse:
        X = csc_matrix(X)

    return X
Exemplo n.º 5
def load_nii(nifiti, clean=True, sparse=False, smooth=False, **kwargs):
    """Convert the nifiti-1 file into a 2D array (n_sample x n_features).
    nifti - str
        The name of the data to load
    clean - boolean (True)
        Remove invariant features features?  If used n_features will 
        not match n_voxels in the orignal nifit1 file.  This operation
        is not reversable.  If you clean there is probablity little
        point in converting to a sparse representation.
    sparse - boolean (False)
        Use the (CSC) sparse format (True)?
    smooth - boolean (False)
        High/low pass filter the data?
    [, ...] - Optional parameters for smooth 
        (defaults: tr=1.5, ub=0.06, lb=0.006)

    X - 2D array (n_sample x n_features)
        The BOLD data
    # Data is 4d (x,y,z,t) we want 2d, where each column is 
    # a voxel and each row is the temporal (t) data
    # i.e. the final shape should be (x*y*x, t)
    nii = nb.nifti1.load(nifiti)

    numt = nii.shape[3]
    numxyz = nii.shape[0] * nii.shape[1] * nii.shape[2]
    dims = (numxyz, numt)
    # Get into 2d (n_feature, n_sample)
    X = nii.get_data().astype('int16').reshape(dims).transpose()
    if clean:
        X = remove_invariant_features(X, sparse=False)
    if smooth:
        # Setup smooth params
        tr = 1.5
        ub = 0.06
        lb = 0.001
        if "tr" in kwargs:
            tr = kwargs["tr"]
        if "ub" in kwargs:
            ub = kwargs["ub"]
        if "lb" in kwargs:
            ub = kwargs["lb"]
        X = smoothfn(X, tr=tr, ub=ub, lb=lb)
    assert checkX(X)
    if sparse: 
        X = csc_matrix(X)

    return X
Exemplo n.º 6
def correlateX(X, y, corr="spearman"):
    """Correlate each feature in X, with y (some set of dummmy 
        coded labels).
    X - a 2d col oreinted array of features
    y - a 1d array of labels
    corr - name of correlation function:
        'pearson' or 'spearman'
    corrs - a 1d array of correlations
    ps - a 1d array of p-values
    Correlation's are calculated using either pearson's r (which 
    assumes Gaussian errors) of spearman's rho (a rank-based 
    non-parametric method.)
    X = np.array(X)
    y = np.array(y)
        ## Force... just in case

    if corr == "pearson":
        corrf = pearsonr
    elif corr == "spearman":
        corrf = spearmanr
        raise ValueError("stat was not valid.")
    corrs = []
    ps = []
    for jj in range(X.shape[1]):
        r, p = corrf(X[:,jj], y)
    return np.array(corrs), np.array(ps)
Exemplo n.º 7
def correlateX(X, y, corr="spearman"):
    """Correlate each feature in X, with y (some set of dummmy 
        coded labels).
    X - a 2d col oreinted array of features
    y - a 1d array of labels
    corr - name of correlation function:
        'pearson' or 'spearman'
    corrs - a 1d array of correlations
    ps - a 1d array of p-values
    Correlation's are calculated using either pearson's r (which 
    assumes Gaussian errors) of spearman's rho (a rank-based 
    non-parametric method.)

    X = np.array(X)
    y = np.array(y)
    ## Force... just in case


    if corr == "pearson":
        corrf = pearsonr
    elif corr == "spearman":
        corrf = spearmanr
        raise ValueError("stat was not valid.")

    corrs = []
    ps = []
    for jj in range(X.shape[1]):
        r, p = corrf(X[:, jj], y)

    return np.array(corrs), np.array(ps)
Exemplo n.º 8
def by_trial(X, trial_index, window, y):
    """Rehapes X so each trial is feature.

    In y, np.nan and 'nan' values are ignored.

    ncol = X.shape[1]

    # ----
    # Remove short trials from X.
    locations = locate_short_trials(trial_index, window)
    if len(locations) > 0:
        short_mask = locations.pop() == trial_index
        for i in locations:
            short_mask = short_mask | (i == trial_index)
        short_mask = np.logical_not(short_mask)
        X = X[short_mask,]
        trial_index = trial_index[short_mask]

    # ----
    # Find all the trials
    trial_masks = []
    for trial in np.unique(trial_index):
        if np.isnan(trial): continue
        trial_masks.append(trial == trial_index)
    # And split up X
    Xlist = []
    feature_names = []
    for mask in trial_masks:
        y0 = 0
        if y is not None:
            y0 = y[mask][0]

        if np.str(y0) != 'nan':
            feature_names.append(np.repeat(y0, ncol))
    feature_names = np.hstack(feature_names)

    # Create Xtrial by horizonal stacking
    Xtrial = np.hstack(Xlist)

    # Sanity
    assert checkX(Xtrial)
    assert Xtrial.shape[1] == feature_names.shape[0], ("After reshape" 
        "Xtrial and feature_names don't match")
    assert Xtrial.shape[0] == window, ("Number of samples in Xtrial" 
        "doesn't match window")
    return Xtrial, feature_names
Exemplo n.º 9
def by_trial(X, trial_index, window, y):
    """Rehapes X so each trial is feature.

    In y, np.nan and 'nan' values are ignored.

    ncol = X.shape[1]

    # ----
    # Remove short trials from X.
    locations = locate_short_trials(trial_index, window)
    if len(locations) > 0:
        short_mask = locations.pop() == trial_index
        for i in locations:
            short_mask = short_mask | (i == trial_index)
        short_mask = np.logical_not(short_mask)

        X = X[short_mask, ]
        trial_index = trial_index[short_mask]

    # ----
    # Find all the trials
    trial_masks = []
    for trial in np.unique(trial_index):
        if np.isnan(trial): continue
        trial_masks.append(trial == trial_index)

    # And split up X
    Xlist = []
    feature_names = []
    for mask in trial_masks:
        y0 = 0
        if y is not None:
            y0 = y[mask][0]

        if np.str(y0) != 'nan':
            Xlist.append(X[mask, ][0:window, ])
            feature_names.append(np.repeat(y0, ncol))

    feature_names = np.hstack(feature_names)

    # Create Xtrial by horizonal stacking
    Xtrial = np.hstack(Xlist)

    # Sanity
    assert checkX(Xtrial)
    assert Xtrial.shape[1] == feature_names.shape[0], (
        "After reshape"
        "Xtrial and feature_names don't match")
    assert Xtrial.shape[0] == window, ("Number of samples in Xtrial"
                                       "doesn't match window")

    return Xtrial, feature_names
Exemplo n.º 10
def restack(X, feature_names):
    """Reshape X into a stack of matrices based on feature names, 
    one new 'layer' for each unique (sorted) entry in x.

    In order to ensure the layers are stackable, if the number of cols 
    is off zero are added as pad whereever needed.

    X = np.array(X)
    feature_names = np.array(feature_names)
    unique_names = np.unique(feature_names)

    nrow = X.shape[0]
    if X.shape[1] != feature_names.shape[0]:
        raise ValueError(
            "Number of features in X doesn't match feature_names.")

    # Init the reshaped X (Xstack) and the feature
    # mask, then loop over the rest
    mask = unique_names[0] == feature_names
    assert mask.shape[0] == feature_names.shape[0], ("The mask was the"
                                                     "wrong shape")
    assert np.sum(mask) > 1, ("The mask was empty")

    Xstack = X[:, mask]
    for name in unique_names[1:]:
        mask = name == feature_names
        assert np.sum(mask) > 1, ("The mask was empty")

        Xname = X[:, mask]
        diff = Xstack.shape[1] - Xname.shape[1]
        if diff < 0:
            Xstack = _addcol(Xstack, np.abs(diff))
        elif diff > 0:
            Xcond = _addcol(Xname, np.abs(diff))
        Xstack = np.vstack([Xstack, Xname])

    fn_stack = []
    for name in unique_names:
        ] * nrow)
    fn_stack = np.array(fn_stack)

    assert checkX(Xstack)
    assert fn_stack.shape[0] == Xstack.shape[0], (
        "After stacking X and"
        "feature_names did not match.")

    return Xstack, fn_stack
Exemplo n.º 11
def decompose_tcdf(tcdf):
    """Decompose tcdf into its parts - X, cond, dataname, index."""

    index = np.array(tcdf["index"].tolist())
    cond = np.array(tcdf["cond"].tolist())
    dataname = np.array(tcdf["dataname"].tolist())

    tcdf = tcdf.drop(labels=["index", "cond", "dataname"], axis=1)
    X = np.array(tcdf.as_matrix())

    assert checkX(X)

    return X, cond, dataname, index
Exemplo n.º 12
def decompose_tcdf(tcdf):
    """Decompose tcdf into its parts - X, cond, dataname, index."""

    index = np.array(tcdf["index"].tolist())
    cond = np.array(tcdf["cond"].tolist())
    dataname = np.array(tcdf["dataname"].tolist())
    tcdf = tcdf.drop(labels=["index", "cond", "dataname"], axis=1)
    X = np.array(tcdf.as_matrix())

    assert checkX(X)
    return X, cond, dataname, index
Exemplo n.º 13
def restack(X, feature_names):
    """Reshape X into a stack of matrices based on feature names, 
    one new 'layer' for each unique (sorted) entry in x.

    In order to ensure the layers are stackable, if the number of cols 
    is off zero are added as pad whereever needed.

    X = np.array(X)
    feature_names = np.array(feature_names)
    unique_names = np.unique(feature_names)

    nrow = X.shape[0]
    if X.shape[1] != feature_names.shape[0]:
        raise ValueError("Number of features in X doesn't match feature_names.")
    # Init the reshaped X (Xstack) and the feature
    # mask, then loop over the rest
    mask = unique_names[0] == feature_names 
    assert mask.shape[0] == feature_names.shape[0], ("The mask was the" 
        "wrong shape")
    assert np.sum(mask) > 1, ("The mask was empty")

    Xstack = X[:,mask]
    for name in unique_names[1:]:
        mask = name == feature_names
        assert np.sum(mask) > 1, ("The mask was empty")

        Xname = X[:,mask]
        diff = Xstack.shape[1] - Xname.shape[1]
        if diff < 0:
            Xstack = _addcol(Xstack, np.abs(diff))
        elif diff > 0:
            Xcond = _addcol(Xname, np.abs(diff))
        Xstack = np.vstack([Xstack, Xname])

    fn_stack = []
    for name in unique_names:
        fn_stack.extend([name, ] * nrow)
    fn_stack = np.array(fn_stack)

    assert checkX(Xstack)
    assert fn_stack.shape[0] == Xstack.shape[0], ("After stacking X and" 
        "feature_names did not match.")

    return Xstack, fn_stack
Exemplo n.º 14
def eva(X, y, trial_index, window, tr):
    """Average trials for each feature in X

     X : 2D array-like (n_sample, n_feature)
         The data to decompose
     y : 1D array, None by default
         Sample labels for the data.  In y, np.nan and 'nan' values 
         are ignored.
     trial_index : 1D array (n_sample, )
         Each unique entry should match a trial.
     window : int 
         Trial length

     Xeva : a 2D arrays (n_feature*unique_y, window)
         The average trials
     feature_names : 1D array
         The names of the features (taken from y)

    evas = []
    eva_names = []
    scaler = MinMaxScaler(feature_range=(0, 1))
    for j in range(X.shape[1]):
        Xtrials = []

        xj = X[:, j][:, np.newaxis]  ## Need 2D

        # Each feature into trials, rescale too
        Xtrial, feature_names = by_trial(xj, trial_index, window, y)
        Xtrial = scaler.fit_transform(Xtrial.astype(np.float))
        unique_fn = sorted(np.unique(feature_names))
        unique_fn = unique_sorted_with_nan(unique_fn)

        # and again by unique_y/fe]ature_names
        Xlabels, _ = by_labels(X=Xtrial.transpose(), y=feature_names)

        # put all that togthether
        Xtrials.extend([Xl.transpose() for Xl in Xlabels])

        # and average the trials then
        # name names.
        evas.extend([Xt.mean(axis=1) for Xt in Xtrials])

    # Reshape : (window, len(unique_y)*n_features)
    Xeva = np.vstack(evas).transpose()
    eva_names = np.asarray(eva_names)

    assert checkX(Xeva)
    assert Xeva.shape[0] == window, ("After EVA rows not equal to window")
    assert Xeva.shape[1] == len(unique_fn) * X.shape[1], (
        "EVA wrong number of features")
    assert eva_names.shape[0] == Xeva.shape[1], ("eva_names and Xeva"
                                                 "don't match")

    return Xeva, eva_names
Exemplo n.º 15
def fir(X, y, trial_index, window, tr):
    """ Average trials for each feature in X, using Burock's 
    (2000) method.
    X : 2D array-like (n_sample, n_feature)
        The data to decompose
    y : 1D array, None by default
         Sample labels for the data. In y, np.nan and 'nan' values 
         are treated as baseline labels.
    trial_index : 1D array (n_sample, )
        Each unique entry should match a trial.
    window : int 
        Trial length

    Xfir : a 2D arrays (n_feature*unique_y, window)
        The average trials
    feature_names : 1D array

    # Norm then pad.
    scaler = MinMaxScaler(feature_range=(0, 1))
    X = scaler.fit_transform(X.astype(np.float))
    X = np.vstack([X, np.ones((window, X.shape[1]), dtype=np.float)])

    # Save the org y names
    ynames = sorted(np.unique(y))
    ynames = unique_sorted_with_nan(ynames)

    # y becomes integers
    y = create_y(y)

    # Make the design matrix.
    dm = _create_dm(y, window)
    # dm DEBUG
    #import time
    #np.savetxt("dm-{0}".format(time.strftime("%m_%d_%Y_%H_%s_%m")), dm, fmt="%1.0f")
    dm = np.matrix(dm)

    # FIR!
    fir_names = []
    firs = []
    for j in range(X.shape[1]):
        x = np.matrix(X[:, j])
        fir = np.array(np.linalg.pinv(dm.T * dm) * dm.T * x.T)[0:-1]
        ## Drop dummy
        fir = fir.reshape(len(ynames) - 1, window)

        fir_names.extend(ynames[1:])  ## Drop nan/baseline

    Xfir = np.vstack(firs).transpose()
    fir_names = np.asarray(fir_names)

    assert checkX(Xfir)
    assert Xfir.shape[0] == window, ("After FIR rows not equal to window")
    assert Xfir.shape[1] == (len(ynames[1:]) *
                             X.shape[1]), ("After"
                                           "FIR wrong number of features")
    assert fir_names.shape[0] == Xfir.shape[1], ("fir_names and Xfir"
                                                 "don't match")

    return Xfir, fir_names
Exemplo n.º 16
        range(*[int(i) for i in args.train_data.split(':')]),
        range(*[int(i) for i in args.train_window.split(':')]),
Xtest, ytest = _data(
        range(*[int(i) for i in args.test_data.split(':')]),
        range(*[int(i) for i in args.test_window.split(':')]),
X = np.vstack([Xtrain, Xtest])
y = np.concatenate([ytrain, ytest])
cvcode = np.asarray([0]*ytrain.shape[0] + [1]*ytest.shape[0])
assert checkX(X)
assert X.shape[0] == y.shape[0], "X and y length mismatch"
assert X.shape[0] == cvcode.shape[0], "X and cvcode length mismatch"

# CV
cv = StratifiedKFold(cvcode, n_folds=2, indices=True)

# Classifier
if args.clf == "RandomForestClassifier":
    clf = RandomForestClassifier(
            n_estimators=500, max_features=None
elif args.clf == "GradientBoostingClassifier":   
    clf = GradientBoostingClassifier(
            n_estimators=100, learning_rate=1.0, 
            max_depth=1, random_state=prng
Exemplo n.º 17
def fir(X, y, trial_index, window, tr):
    """ Average trials for each feature in X, using Burock's 
    (2000) method.
    X : 2D array-like (n_sample, n_feature)
        The data to decompose
    y : 1D array, None by default
         Sample labels for the data. In y, np.nan and 'nan' values 
         are treated as baseline labels.
    trial_index : 1D array (n_sample, )
        Each unique entry should match a trial.
    window : int 
        Trial length

    Xfir : a 2D arrays (n_feature*unique_y, window)
        The average trials
    feature_names : 1D array

    # Norm then pad.
    scaler = MinMaxScaler(feature_range=(0, 1))
    X = scaler.fit_transform(X.astype(np.float))
    X = np.vstack([X, np.ones((window, X.shape[1]), dtype=np.float)])

    # Save the org y names
    ynames = sorted(np.unique(y))
    ynames = unique_sorted_with_nan(ynames)
    # y becomes integers
    y = create_y(y)

    # Make the design matrix.
    dm = _create_dm(y, window)
    # dm DEBUG
    #import time
    #np.savetxt("dm-{0}".format(time.strftime("%m_%d_%Y_%H_%s_%m")), dm, fmt="%1.0f")
    dm = np.matrix(dm)
    # FIR!
    fir_names = []
    firs = []
    for j in range(X.shape[1]):
        x = np.matrix(X[:,j])
        fir = np.array(np.linalg.pinv(dm.T * dm) * dm.T * x.T)[0:-1] 
            ## Drop dummy
        fir = fir.reshape(len(ynames)-1, window)  

        fir_names.extend(ynames[1:])  ## Drop nan/baseline

    Xfir = np.vstack(firs).transpose()
    fir_names = np.asarray(fir_names)

    assert checkX(Xfir)
    assert Xfir.shape[0] == window, ("After FIR rows not equal to window")
    assert Xfir.shape[1] == (len(ynames[1:]) * X.shape[1]), ("After" 
        "FIR wrong number of features")
    assert fir_names.shape[0] == Xfir.shape[1], ("fir_names and Xfir" 
        "don't match")

    return Xfir, fir_names