Пример #1
0
    def loadTest(self, y, yMatch):
        """
        Method to load the test data into the object.  We might be interested
        in loading new test data, so we have explicitly defined this is
        as a method.
        
        Parameters:
        - - - - -
            y : SubjectFeatures object for a test brain      
            
            yMatch : MatchingFeaturesTest object containing vertLib attribute 
                    detailing which labels each vertex in surface y maps to 
                    in the training data

        """

        load = self.load
        save = self.save

        features = self.features

        # load test subject data, save as attribtues
        tObject = ld.loadH5(y, *['full'])
        ID = tObject.attrs['ID']

        parsedData = ld.parseH5(tObject, features)
        tObject.close()

        data = parsedData[ID]
        mtd = cu.mergeFeatures(data, features)
        print 'Testing shape: {}'.format(mtd.shape)

        if self.scaled:
            scaler = self.scaler
            mtd = scaler.transform(mtd)

        threshed = ld.loadMat(yMatch)

        # Computing label-vertex memberships is time consuming
        # If already precomputed for given test data at specified threshold,
        # can supply path to load file.
        if load:
            if os.path.isfile(load):
                ltvm = ld.loadPick(load)
        # Otherwise, compute label-vertex memberships.
        else:
            ltvm = cu.vertexMemberships(threshed, 180)

        self.ltvm = ltvm

        # if save is provided, save label-vertex memberships to file
        if save:
            try:
                with open(save, "wb") as outFile:
                    pickle.dump(self.labelToVertexMaps, outFile, -1)
            except IOError:
                print('Cannot save label-vertex memberships to file.')

        return [threshed, mtd, ltvm]
Пример #2
0
    def loadTest(self, y, yMatch):
        """
        Method to load the test data into the object.  We might be interested
        in loading new test data, so we have explicitly defined this is
        as a method.
        
        Parameters:
        - - - - -
            y : SubjectFeatures object for a test brain      
            
            yMatch : MatchingFeaturesTest object containing vertLib attribute 
                    detailing which labels each vertex in surface y maps to 
                    in the training data

        """

        # load test subject data, save as attribtues
        tObject = ld.loadH5(y, *['full'])
        ID = tObject.attrs['ID']

        parsedData = ld.parseH5(tObject, self.features)
        tObject.close()

        data = parsedData[ID]
        mtd = cu.mergeFeatures(data, self.features)
        print 'Testing shape: {}'.format(mtd.shape)

        if self.scaled:
            scaler = self.scaler
            mtd = scaler.transform(mtd)

        threshed = ld.loadMat(yMatch)

        ltvm = cu.vertexMemberships(threshed, 180)

        return [threshed, mtd, ltvm]
def loadData(subjectList, dataMap, features, hemi):
    """
    Generates the training data from a list of subjects.
    
    Parameters:
    - - - - -
        subjectList : list of subjects to include in training set
        dataDir : main directory where data exists -- individual features
                    will exist in sub-directories here
        features : list of features to include
        hemi : hemisphere to process
    """

    objDict = dataMap['object'].items()
    objDir = objDict[0][0]
    objExt = objDict[0][1]

    midDict = dataMap['midline'].items()
    midDir = midDict[0][0]
    midExt = midDict[0][1]

    matDict = dataMap['matching'].items()
    matDir = matDict[0][0]
    matExt = matDict[0][1]

    data = {}
    matches = {}

    for s in subjectList:

        # Training data
        trainObject = '{}{}.{}.{}'.format(objDir, s, hemi, objExt)
        print trainObject
        midObject = '{}{}.{}.{}'.format(midDir, s, hemi, midExt)
        matObject = '{}{}.{}.{}'.format(matDir, s, hemi, matExt)

        # Check to make sure all 3 files exist
        if os.path.isfile(trainObject) and os.path.isfile(
                midObject) and os.path.isfile(matObject):

            # Load midline indices
            # Subtract 1 for differece between Matlab and Python indexing
            mids = ld.loadMat(midObject) - 1
            mids = set(mids)

            match = ld.loadMat(matObject)

            # Load training data and training labels
            trainH5 = h5py.File(trainObject, mode='r')

            # Get data corresponding to features of interest
            subjData = ld.parseH5(trainH5, features)
            trainH5.close()

            nSamples = set(np.arange(subjData[s][features[0]].shape[0]))
            coords = np.asarray(list(nSamples.difference(mids)))

            for f in subjData[s].keys():
                tempData = subjData[s][f]
                if tempData.ndim == 1:
                    tempData.shape += (1, )

                subjData[s][f] = np.squeeze(tempData[coords, :])

            match = match[coords, :]

            data[s] = subjData[s]
            matches[s] = match

    return [data, matches]
Пример #4
0
def loadDataFromList(subjectList, dataDir, features, hemi):
    """
    Generates the training data for the neural network.
    
    Parameters:
    - - - - -
        subjectList : list of subjects to include in training set
        dataDir : main directory where data exists -- individual features
                    will exist in sub-directories here
        features : list of features to include
        hemi : hemisphere to process
    """

    hemisphere = {}.fromkeys('Left', 'Right')
    hemisphere['Left'] = 'L'
    hemisphere['Right'] = 'R'

    H = hemisphere[hemi]

    # For now, we hardcode where the data is
    trainDir = '{}TrainingObjects/FreeSurfer/'.format(dataDir)
    trainExt = '.{}.TrainingObject.aparc.a2009s.h5'.format(H)

    midDir = '{}Midlines/'.format(dataDir)
    midExt = '.{}.Midline_Indices.mat'.format(H)

    data = {}

    for s in subjectList:

        # Training data
        trainObject = '{}{}{}'.format(trainDir, s, trainExt)
        midObject = '{}{}{}'.format(midDir, s, midExt)

        # Check to make sure all 3 files exist
        if os.path.isfile(trainObject) and os.path.isfile(midObject):

            # Load midline indices
            # Subtract 1 for differece between Matlab and Python indexing
            mids = ld.loadMat(midObject) - 1
            mids = set(mids)

            # Load training data and training labels
            trainH5 = h5py.File(trainObject, mode='r')

            # Get data corresponding to features of interest
            subjData = ld.parseH5(trainH5, features)
            trainH5.close()

            nSamples = set(np.arange(subjData[s][features[0]].shape[0]))
            coords = np.asarray(list(nSamples.difference(mids)))

            for f in subjData[s].keys():
                tempData = subjData[s][f]
                if tempData.ndim == 1:
                    tempData.shape += (1, )

                subjData[s][f] = np.squeeze(tempData[coords, :])

            data[s] = subjData[s]

    return data
Пример #5
0
    def loadTraining(self, trainObject, dataDir, hemisphere, features):
        """
        Parameters:
        - - - - -
            trainObject : input training data (either '.p' file, or dictionary)
        """

        # check feature value
        if not features and not isinstance(features, list):
            raise ValueError('Features cannot be empty.  Must be a list.')
        else:
            self.features = features

        # load the training data
        loadingFeatures = copy.copy(features)
        loadingFeatures.append('label')

        if isinstance(trainObject, str):
            trainData = ld.loadH5(trainObject, *['full'])
        elif isinstance(trainObject, h5py._hl.files.File) or isinstance(
                trainObject, dict):
            trainData = trainObject
        elif isinstance(trainObject, list):
            trainData = loadDataFromList(trainObject, dataDir, loadingFeatures,
                                         hemisphere)
        else:
            raise ValueError('Training object is of incorrect type.')

        if not trainData:
            raise ValueError('Training data cannot be empty.')

        if isinstance(trainData, h5py._hl.files.File):
            parseFeatures = copy.deepcopy(self.features)
            parseFeatures.append('label')

            parsedData = ld.parseH5(trainData, parseFeatures)
            trainData.close()
            trainData = parsedData

        # get subject IDs in training data
        subjects = trainData.keys()

        # if exclude_testing is set, the data for these subjects when fitting the models
        if self.exclude_testing:
            subjects = list(
                set(subjects).difference(set(self.exclude_testing)))

        # if random is set, select random subset of size random from viable training subjects
        if not self.random:
            randomSample = len(subjects)
        else:
            randomSample = min(self.random, len(subjects))

        sample = np.random.choice(subjects, size=randomSample, replace=False)
        trainData = {s: trainData[s] for s in sample}

        training = []
        labels = []

        #trainFeatures = list(set(self.features).difference({'label'}))

        print 'Model features: {}'.format(features)

        nf = []
        for f in self.features:
            if f != 'label':
                nf.append(f)

        for subj in trainData.keys():
            training.append(cu.mergeFeatures(trainData[subj], nf))
            labels.append(cu.mergeFeatures(trainData[subj], ['label']))

        trainData = np.squeeze(np.row_stack(training))
        labelVector = np.squeeze(np.concatenate(labels))
        self.labels = set(labelVector).difference({0, -1})

        if self.scale:

            scaler = preprocessing.StandardScaler(with_mean=True,
                                                  with_std=True)
            trainData = scaler.fit_transform(trainData)
            self.scaler = scaler
            self.scaled = True

        # isolate training data corresponding to each label
        labelData = cu.partitionData(trainData, labelVector, self.labels)
        response = cu.buildResponseVector(self.labels, labelData)

        self.input_dim = labelData[labelData.keys()[0]].shape[1]

        # check quality of training data to ensure all features have same length,
        # all response vectors have the same number of samples, and that all training data
        # has the same features
        cond = True
        if not compareTrainingDataKeys(labelData, response):
            print(
                'WARNING: Label data and label response do not have same keys.'
            )
            cond = False

        if not compareTrainingDataSize(labelData, response):
            print('WARNING: Label data and label response are not same shape.')
            cond = False

        if not cond:
            raise ValueError('Training data is flawed.')

        return [labelData, response]