Пример #1
0
    def constructFromFile(self, fileName, **args):

        matrix = ckernel.KernelMatrix()
        matrix.thisown = 0
        patternID = []
        delim = None
        delim = misc.getDelim(fileName)
        idColumn = 0
        if 'idColumn' in args:
            idColumn = args['idColumn']
        if idColumn is None:
            firstColumn = 0
        else:
            firstColumn = 1
        print firstColumn
        print idColumn
        matrixFile = myio.myopen(fileName)
        firstRow = True
        for line in matrixFile:
            # skip comments:
            if line[0] in ["%", "#"]: continue
            tokens = line.split(delim)
            # check if the file is in gist format:
            if firstRow:
                firstRow = False
                try:
                    float(tokens[-1])
                except:
                    continue
                if (('headerRow' in args and args['headerRow'])
                        or ('gistFormat' in args and args['gistFormat'])):
                    continue
            values = arrayWrap.floatVector(
                [float(token) for token in tokens[firstColumn:]])
            matrix.addRow(values)
            if idColumn is not None:
                patternID.append(tokens[0])

        ckerneldata.KernelData.__init__(self, matrix)
        if 'labelsFile' in args:
            self.attachLabels(labels.Labels(args['labelsFile'], **args))
        else:
            self.attachLabels(labels.Labels(None, patternID=patternID))
Пример #2
0
def load_libsvm_format(file_name, **args):
    """
    Load a dataset from a file in libsvm format
    returns an instance of PyVectorDataSet
    If you want to use the data with a SparseDataSet, you can directly
    do it using the SparseDataSet constructor.
    """

    regression = False
    if 'regression' in args:
        regression = args['regression']
    # first extract labels and check how many features there are:
    labels = []
    num_features = 0
    if not os.path.exists(file_name):
        raise ValueError, "file doesn't exist at %s" % file_name
    file_handle = myio.myopen(file_name)
    for line in file_handle:
        tokens = line.split()
        if regression:
            labels.append(float(tokens[0]))
        else:
            labels.append(str(int(float(tokens[0]))))
        for token in tokens[1:]:
            id, value = token.split(':')
            num_features = max(num_features, int(id))
    X = numpy.zeros((len(labels), num_features), numpy.float)
    # fill in the array:
    i = 0
    for line in open(file_name):
        tokens = line.split()
        for token in tokens[1:]:
            id, value = token.split(':')
            id = int(id) - 1
            X[i][id] = float(value)
        i += 1
    data = PyVectorDataSet(X)
    if regression:
        labels = Labels(labels, numericLabels=True)
    else:
        labels = Labels(labels)
    data.attachLabels(labels)
    return data
Пример #3
0
def load_libsvm_format(file_name, **args) :
    """
    Load a dataset from a file in libsvm format
    returns an instance of PyVectorDataSet
    If you want to use the data with a SparseDataSet, you can directly
    do it using the SparseDataSet constructor.
    """

    regression = False
    if 'regression' in args :
        regression = args['regression']
    # first extract labels and check how many features there are:
    labels = []
    num_features = 0
    if not os.path.exists(file_name) :
        raise ValueError, "file doesn't exist at %s" % file_name
    file_handle = myio.myopen(file_name)
    for line in file_handle :
        tokens = line.split()
        if regression :
            labels.append(float(tokens[0]))
        else :
            labels.append(str(int(float(tokens[0]))))
        for token in tokens[1:] :
            id,value = token.split(':')
            num_features = max(num_features, int(id))
    X = numpy.zeros((len(labels), num_features), numpy.float)
    # fill in the array:
    i = 0
    for line in open(file_name) :
        tokens = line.split()
        for token in tokens[1:] :
            id,value = token.split(':')
            id = int(id) - 1
            X[i][id] = float(value)
        i+=1
    data = PyVectorDataSet(X)
    if regression :
        labels = Labels(labels, numericLabels=True)
    else :
        labels = Labels(labels)
    data.attachLabels(labels)
    return data
Пример #4
0
    def constructFromFile(self, fileName, **args):

        matrix = ckernel.KernelMatrix()
        matrix.thisown = 0
        patternID = []
        delim = None
        delim = misc.getDelim(fileName)
        idColumn = 0
        if "idColumn" in args:
            idColumn = args["idColumn"]
        if idColumn is None:
            firstColumn = 0
        else:
            firstColumn = 1
        print firstColumn
        print idColumn
        matrixFile = myio.myopen(fileName)
        firstRow = True
        for line in matrixFile:
            # skip comments:
            if line[0] in ["%", "#"]:
                continue
            tokens = line.split(delim)
            # check if the file is in gist format:
            if firstRow:
                firstRow = False
                try:
                    float(tokens[-1])
                except:
                    continue
                if ("headerRow" in args and args["headerRow"]) or ("gistFormat" in args and args["gistFormat"]):
                    continue
            values = arrayWrap.floatVector([float(token) for token in tokens[firstColumn:]])
            matrix.addRow(values)
            if idColumn is not None:
                patternID.append(tokens[0])

        ckerneldata.KernelData.__init__(self, matrix)
        if "labelsFile" in args:
            self.attachLabels(labels.Labels(args["labelsFile"], **args))
        else:
            self.attachLabels(labels.Labels(None, patternID=patternID))
Пример #5
0
    def __init__(self, file, **args):

        if type(file) == type(''):
            if not os.path.exists(file):
                raise ValueError, "file does not exist at %s" % file
            self._fileHandle = myio.myopen(file)
            #self._fileHandle = open(file)
        else:
            self._fileHandle = file

        if 'classes' in args:
            self.classesToRead = args['classes']
        else:
            self.classesToRead = []

        if 'patterns' in args:
            self.patternsToRead = args['patterns']
        else:
            self.patternsToRead = None

        if 'features' in args:
            self.featuresToRead = args['features']
        else:
            self.featuresToRead = []
Пример #6
0
    def __init__(self, file, **args) :

        if type(file) == type('') :
            if not os.path.exists(file) :
                raise ValueError, "file does not exist at %s" % file
            self._fileHandle = myio.myopen(file)
            #self._fileHandle = open(file)
        else :
            self._fileHandle = file

        if 'classes' in args :
            self.classesToRead = args['classes']
        else :
            self.classesToRead = []

        if 'patterns' in args :
            self.patternsToRead = args['patterns']
        else :
            self.patternsToRead = None

        if 'features' in args :
            self.featuresToRead = args['features']
        else :
            self.featuresToRead = []