示例#1
0
def readARFF(filename):
    featureSet = IdSet(1)
    classSet = IdSet(0)
    f = open(filename,"rt")
    inData = False
    lines = f.readlines()
    counter = ProgressCounter(len(lines),"ARFFLine")
    examples = []
    for line in lines:
        counter.update(string="Processing line " + str(counter.current + 1) + ": ")
        line = line.strip()
        if len(line) == 0 or line[0] == "%":
            continue
        elif line[0] == "@":
            #print line
            category = line.split()[0].lower()
            if category == "@attribute":
                category, name, type = line.split()
                assert(not inData)
                if name.lower() == "class":
                    name = name.lower()
                    classNames = type[1:-1].split(",")
                    assert(len(classNames)==2)
                    classSet.defineId(classNames[0].strip(),1)
                    classSet.defineId(classNames[1].strip(),-1)
                featureSet.getId(name)
            elif category.lower() == "@relation":
                assert(not inData)
            elif category == "@data":
                inData = True
        else:
            assert(inData)
            count = 1
            features = {}
            for column in line.split(","):
                if featureSet.getName(count) != "class":
                    features[count] = float(column)
                else:
                    classId = classSet.getId(column, False)
                    assert(classId != None)
                count += 1
            exampleCount = str(len(examples))
            exampleId = "BreastCancer.d" + exampleCount + ".s0.x0"
            examples.append([exampleId,classId,features,{}])
                    
    return examples
示例#2
0
def getClassSet(rows, classSet=None):
    from Core.IdSet import IdSet
    classNames = set()
    for row in rows:
        classNames.add(row["class"])
        classNames.add(row["prediction"])
    
    # In the case of multiclass, give integer id:s for the classes
    if classSet == None:
        classSet = IdSet()
        assert(not ("1" in classNames and "neg" in classNames))
        assert("1" in classNames or "neg" in classNames)
        if "1" in classNames:
            classSet.defineId("1",1)
        else:
            classSet.defineId("neg",1)
    for i in sorted(list(classNames)):
        if i != "1" and i != "neg":
            classSet.getId(i)
    return classSet
示例#3
0
def getClassSet(rows, classSet=None):
    from Core.IdSet import IdSet
    classNames = set()
    for row in rows:
        classNames.add(row["class"])
        classNames.add(row["prediction"])
    
    # In the case of multiclass, give integer id:s for the classes
    if classSet == None:
        classSet = IdSet()
        assert(not ("1" in classNames and "neg" in classNames))
        assert("1" in classNames or "neg" in classNames)
        if "1" in classNames:
            classSet.defineId("1",1)
        else:
            classSet.defineId("neg",1)
    for i in sorted(list(classNames)):
        if i != "1" and i != "neg":
            classSet.getId(i)
    return classSet