def constructFromFile(self, fileName, **args): matrix = ckernel.KernelMatrix() matrix.thisown = 0 patternID = [] delim = None delim = misc.getDelim(fileName) idColumn = 0 if 'idColumn' in args: idColumn = args['idColumn'] if idColumn is None: firstColumn = 0 else: firstColumn = 1 print firstColumn print idColumn matrixFile = myio.myopen(fileName) firstRow = True for line in matrixFile: # skip comments: if line[0] in ["%", "#"]: continue tokens = line.split(delim) # check if the file is in gist format: if firstRow: firstRow = False try: float(tokens[-1]) except: continue if (('headerRow' in args and args['headerRow']) or ('gistFormat' in args and args['gistFormat'])): continue values = arrayWrap.floatVector( [float(token) for token in tokens[firstColumn:]]) matrix.addRow(values) if idColumn is not None: patternID.append(tokens[0]) ckerneldata.KernelData.__init__(self, matrix) if 'labelsFile' in args: self.attachLabels(labels.Labels(args['labelsFile'], **args)) else: self.attachLabels(labels.Labels(None, patternID=patternID))
def load_libsvm_format(file_name, **args): """ Load a dataset from a file in libsvm format returns an instance of PyVectorDataSet If you want to use the data with a SparseDataSet, you can directly do it using the SparseDataSet constructor. """ regression = False if 'regression' in args: regression = args['regression'] # first extract labels and check how many features there are: labels = [] num_features = 0 if not os.path.exists(file_name): raise ValueError, "file doesn't exist at %s" % file_name file_handle = myio.myopen(file_name) for line in file_handle: tokens = line.split() if regression: labels.append(float(tokens[0])) else: labels.append(str(int(float(tokens[0])))) for token in tokens[1:]: id, value = token.split(':') num_features = max(num_features, int(id)) X = numpy.zeros((len(labels), num_features), numpy.float) # fill in the array: i = 0 for line in open(file_name): tokens = line.split() for token in tokens[1:]: id, value = token.split(':') id = int(id) - 1 X[i][id] = float(value) i += 1 data = PyVectorDataSet(X) if regression: labels = Labels(labels, numericLabels=True) else: labels = Labels(labels) data.attachLabels(labels) return data
def load_libsvm_format(file_name, **args) : """ Load a dataset from a file in libsvm format returns an instance of PyVectorDataSet If you want to use the data with a SparseDataSet, you can directly do it using the SparseDataSet constructor. """ regression = False if 'regression' in args : regression = args['regression'] # first extract labels and check how many features there are: labels = [] num_features = 0 if not os.path.exists(file_name) : raise ValueError, "file doesn't exist at %s" % file_name file_handle = myio.myopen(file_name) for line in file_handle : tokens = line.split() if regression : labels.append(float(tokens[0])) else : labels.append(str(int(float(tokens[0])))) for token in tokens[1:] : id,value = token.split(':') num_features = max(num_features, int(id)) X = numpy.zeros((len(labels), num_features), numpy.float) # fill in the array: i = 0 for line in open(file_name) : tokens = line.split() for token in tokens[1:] : id,value = token.split(':') id = int(id) - 1 X[i][id] = float(value) i+=1 data = PyVectorDataSet(X) if regression : labels = Labels(labels, numericLabels=True) else : labels = Labels(labels) data.attachLabels(labels) return data
def constructFromFile(self, fileName, **args): matrix = ckernel.KernelMatrix() matrix.thisown = 0 patternID = [] delim = None delim = misc.getDelim(fileName) idColumn = 0 if "idColumn" in args: idColumn = args["idColumn"] if idColumn is None: firstColumn = 0 else: firstColumn = 1 print firstColumn print idColumn matrixFile = myio.myopen(fileName) firstRow = True for line in matrixFile: # skip comments: if line[0] in ["%", "#"]: continue tokens = line.split(delim) # check if the file is in gist format: if firstRow: firstRow = False try: float(tokens[-1]) except: continue if ("headerRow" in args and args["headerRow"]) or ("gistFormat" in args and args["gistFormat"]): continue values = arrayWrap.floatVector([float(token) for token in tokens[firstColumn:]]) matrix.addRow(values) if idColumn is not None: patternID.append(tokens[0]) ckerneldata.KernelData.__init__(self, matrix) if "labelsFile" in args: self.attachLabels(labels.Labels(args["labelsFile"], **args)) else: self.attachLabels(labels.Labels(None, patternID=patternID))
def __init__(self, file, **args): if type(file) == type(''): if not os.path.exists(file): raise ValueError, "file does not exist at %s" % file self._fileHandle = myio.myopen(file) #self._fileHandle = open(file) else: self._fileHandle = file if 'classes' in args: self.classesToRead = args['classes'] else: self.classesToRead = [] if 'patterns' in args: self.patternsToRead = args['patterns'] else: self.patternsToRead = None if 'features' in args: self.featuresToRead = args['features'] else: self.featuresToRead = []
def __init__(self, file, **args) : if type(file) == type('') : if not os.path.exists(file) : raise ValueError, "file does not exist at %s" % file self._fileHandle = myio.myopen(file) #self._fileHandle = open(file) else : self._fileHandle = file if 'classes' in args : self.classesToRead = args['classes'] else : self.classesToRead = [] if 'patterns' in args : self.patternsToRead = args['patterns'] else : self.patternsToRead = None if 'features' in args : self.featuresToRead = args['features'] else : self.featuresToRead = []