Exemplo n.º 1
0
def encodePP(pps):
    pps = ""
    for pp in pps:
        objname = getobjectname(pp[1], "")
        if len(objname):
            pps += "_" + objname
        else:
            return "*"
    return pps
Exemplo n.º 2
0
    def saveToFiles(self, learners, filename):
        for i in range(len(learners)):
            if self.loaded[i]:
                continue

            fname = ".\\cache\\" + filename % getobjectname(learners[i], "*")
            if not "*" in fname:
                if not os.path.isdir("cache"):
                    os.mkdir("cache")
                f = open(fname, "wb")
                pickler = pickle.Pickler(f, 1)
                pickler.dump([((x.actualClass, x.iterationNumber),
                               (x.classes[i], x.probabilities[i]))
                              for x in self.results])
                f.close()
Exemplo n.º 3
0
    def loadFromFiles(self, learners, filename):
        self.loaded = []

        for i in range(len(learners)):
            f = None
            try:
                f = open(
                    ".\\cache\\" + filename % getobjectname(learners[i], "*"),
                    "rb")
                d = cPickle.load(f)
                for ex in range(len(self.results)):
                    tre = self.results[ex]
                    if (tre.actualClass, tre.iterationNumber) != d[ex][0]:
                        raise SystemError, "mismatching example tables or sampling"
                    self.results[ex].setResult(i, d[ex][1][0], d[ex][1][1])
                self.loaded.append(1)
            except exceptions.Exception:
                self.loaded.append(0)
            if f:
                f.close()

        return not 0 in self.loaded
Exemplo n.º 4
0
def testWithIndices(learners,
                    examples,
                    indices,
                    indicesrandseed="*",
                    pps=[],
                    callback=None,
                    **argkw):
    verb = argkw.get("verbose", 0)
    cache = argkw.get("cache", 0)
    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get(
        "storeClassifiers", 0)
    cache = cache and not storeclassifiers

    examples, weight = demangleExamples(examples)
    nLrn = len(learners)

    if not examples:
        raise SystemError("Test data set with no examples")
    if not examples.domain.classVar:
        raise "Test data set without class attribute"


##    for pp in pps:
##        if pp[0]!="L":
##            raise SystemError, "cannot preprocess testing examples"

    nIterations = max(indices) + 1
    if examples.domain.classVar.varType == orange.VarTypes.Discrete:
        values = list(examples.domain.classVar.values)
        basevalue = examples.domain.classVar.baseValue
    else:
        basevalue = values = None

    conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float
    testResults = ExperimentResults(nIterations,
                                    [getobjectname(l) for l in learners],
                                    values, weight != 0, basevalue)
    testResults.results = [
        TestedExample(indices[i], conv(examples[i].getclass()), nLrn,
                      examples[i].getweight(weight))
        for i in range(len(examples))
    ]

    if argkw.get("storeExamples", 0):
        testResults.examples = examples

    ccsum = hex(examples.checksum())[2:]
    ppsp = encodePP(pps)
    fnstr = "{TestWithIndices}_%s_%s%s-%s" % ("%s", indicesrandseed, ppsp,
                                              ccsum)
    if "*" in fnstr:
        cache = 0

    if cache and testResults.loadFromFiles(learners, fnstr):
        printVerbose("  loaded from cache", verb)
    else:
        for fold in range(nIterations):
            # learning
            learnset = examples.selectref(indices, fold, negate=1)
            if not len(learnset):
                continue
            testset = examples.selectref(indices, fold, negate=0)
            if not len(testset):
                continue

            for pp in pps:
                if pp[0] == "B":
                    learnset = pp[1](learnset)
                    testset = pp[1](testset)

            for pp in pps:
                if pp[0] == "L":
                    learnset = pp[1](learnset)
                elif pp[0] == "T":
                    testset = pp[1](testset)
                elif pp[0] == "LT":
                    (learnset, testset) = pp[1](learnset, testset)

            if not learnset:
                raise SystemError("no training examples after preprocessing")

            if not testset:
                raise SystemError("no test examples after preprocessing")

            classifiers = [None] * nLrn
            for i in range(nLrn):
                if not cache or not testResults.loaded[i]:
                    classifiers[i] = learners[i](learnset, weight)
            if storeclassifiers:
                testResults.classifiers.append(classifiers)

            # testing
            tcn = 0
            for i in range(len(examples)):
                if (indices[i] == fold):
                    # This is to prevent cheating:
                    ex = orange.Example(testset[tcn])
                    ex.setclass("?")
                    tcn += 1
                    for cl in range(nLrn):
                        if not cache or not testResults.loaded[cl]:
                            cr = classifiers[cl](ex, orange.GetBoth)
                            if cr[0].isSpecial():
                                raise "Classifier %s returned unknown value" % (
                                    classifiers[cl].name or ("#%i" % cl))
                            testResults.results[i].setResult(cl, cr[0], cr[1])
            if callback:
                callback()
        if cache:
            testResults.saveToFiles(learners, fnstr)

    return testResults