def constructFromFile(self, fileName, **args) : if 'data' not in args : raise ValueError, 'missing data object' self._data = args['data'] patternIDdict = misc.list2dict(self._data.labels.patternID, range(len(self._data))) labels = Labels(fileName) patterns = [] pairs = [] for i in range(len(labels)) : p1,p2 = labels.patternID[i].split('_') # add only pairs for which we have kernel data: if p1 in patternIDdict and p2 in patternIDdict : pairs.append((patternIDdict[p1],patternIDdict[p2])) patterns.append(i) else : print p1, ' or ', p2, 'not found' labels = labels.__class__(labels, patterns = patterns) self.pairs = pairs first = [pair[0] for pair in pairs] second = [pair[1] for pair in pairs] firstVector = arrayWrap.intVector([pair[0] for pair in pairs]) secondVector = arrayWrap.intVector([pair[1] for pair in pairs]) self.callConstructor(firstVector, secondVector) WrapperDataSet.attachLabels(self, labels)
def constructFromFile(self, fileName): patternIDdict = misc.list2dict(self._data.labels.patternID, range(len(self._data))) labels = Labels(fileName) patterns = [] pairs = [] for i in range(len(labels)): p1, p2 = labels.patternID[i].split('_') # add only pairs for which we have kernel data: if p1 in patternIDdict and p2 in patternIDdict: pairs.append((patternIDdict[p1], patternIDdict[p2])) patterns.append(i) else: print p1, ' or ', p2, 'not found' labels = labels.__class__(labels, patterns=patterns) self.pairs = pairs first = [pair[0] for pair in pairs] second = [pair[1] for pair in pairs] firstVector = arrayWrap.intVector([pair[0] for pair in pairs]) secondVector = arrayWrap.intVector([pair[1] for pair in pairs]) self.callConstructor(firstVector, secondVector) WrapperDataSet.attachLabels(self, labels)
def constructFromFile(self, fileName): patternIDdict = misc.list2dict(self._data.labels.patternID, range(len(self._data))) labels = Labels(fileName) patterns = [] pairs = [] for i in range(len(labels)): p1, p2 = labels.patternID[i].split("_") # add only pairs for which we have kernel data: if p1 in patternIDdict and p2 in patternIDdict: pairs.append((patternIDdict[p1], patternIDdict[p2])) patterns.append(i) else: print p1, " or ", p2, "not found" labels = labels.__class__(labels, patterns=patterns) self.pairs = pairs first = [pair[0] for pair in pairs] second = [pair[1] for pair in pairs] firstVector = arrayWrap.intVector([pair[0] for pair in pairs]) secondVector = arrayWrap.intVector([pair[1] for pair in pairs]) self.callConstructor(firstVector, secondVector) WrapperDataSet.attachLabels(self, labels)
def mean(self, patterns=None): if patterns is None: patterns = range(len(self)) if min(patterns) < 0 or max(patterns) >= len(self): raise ValueError, 'Pattern index out of range' cpatterns = arrayWrap.intVector(patterns) return self.container.mean(self, cpatterns)
def mean(self, patterns = None) : if patterns is None : patterns = range(len(self)) if min(patterns) < 0 or max(patterns) >= len(self) : raise ValueError, 'Pattern index out of range' cpatterns = arrayWrap.intVector(patterns) return self.container.mean(self, cpatterns)
def nonzero(self, feature, patterns=None): if patterns is None: patterns = range(len(self)) if type(patterns) != type([]): patterns = list(patterns) if min(patterns) < 0 or max(patterns) >= len(self): raise ValueError, 'Pattern index goes outside of range' cpatterns = arrayWrap.intVector(patterns) return self.container.nonzero(self, feature, cpatterns)
def featureCounts(self, patterns=None): if patterns is None: patterns = range(len(self)) if type(patterns) != type([]): patterns = list(patterns) if min(patterns) < 0 or max(patterns) >= len(self): raise ValueError, 'Pattern index out of range' cpatterns = arrayWrap.intVector(patterns) return self.container.featureCounts(self, cpatterns)
def nonzero(self, feature, patterns = None) : if patterns is None : patterns = range(len(self)) if type(patterns) != type([]) : patterns = list(patterns) if min(patterns) < 0 or max(patterns) >= len(self) : raise ValueError, 'Pattern index goes outside of range' cpatterns = arrayWrap.intVector(patterns) return self.container.nonzero(self, feature, cpatterns)
def featureCounts(self, patterns = None) : if patterns is None : patterns = range(len(self)) if type(patterns) != type([]) : patterns = list(patterns) if min(patterns) < 0 or max(patterns) >= len(self) : raise ValueError, 'Pattern index out of range' cpatterns = arrayWrap.intVector(patterns) return self.container.featureCounts(self, cpatterns)
def eliminateFeatures(self, featureList): """eliminate a list of features from a dataset INPUT: featureList - a list of features to eliminate; these are numbers between 0 and numFeatures-1 (indices of features, not their IDs)""" if len(featureList) == 0: return if type(featureList[0]) == type(''): featureList = self.featureNames2IDs(featureList) featureList.sort() if type(featureList) != list: featureList = list(featureList) if max(featureList) >= self.numFeatures or min(featureList) < 0: raise ValueError, 'Bad feature list' cfeatureList = arrayWrap.intVector(featureList) self.container.eliminateFeatures(self, cfeatureList) self.updateFeatureDict(featureList)
def eliminateFeatures(self, featureList): """eliminate a list of features from a dataset INPUT: featureList - a list of features to eliminate; these are numbers between 0 and numFeatures-1 (indices of features, not their IDs)""" if len(featureList) == 0 : return if type(featureList[0]) == type('') : featureList = self.featureNames2IDs(featureList) featureList.sort() if type(featureList) != list : featureList = list(featureList) if max(featureList) >= self.numFeatures or min(featureList) < 0 : raise ValueError, 'Bad feature list' cfeatureList = arrayWrap.intVector(featureList) self.container.eliminateFeatures(self, cfeatureList) self.updateFeatureDict(featureList)