示例#1
0
    def buildDataset(cls, all_results: Dataset, testie: Testie) -> List[tuple]:
        dtype = testie.variables.dtype()
        y = OrderedDict()
        dataset = []
        for i, (run, results_types) in enumerate(all_results.items()):
            vars = list(run.variables.values())
            if not results_types is None and len(results_types) > 0:
                dataset.append(vars)
                for result_type, results in results_types.items():
                    r = np.mean(results)
                    y.setdefault(result_type, []).append(r)

        dtype['values'] = [None] * len(dtype['formats'])
        for i, f in enumerate(dtype['formats']):
            if f is str:
                dtype['formats'][i] = int
                values = OrderedSet()
                for row in dataset:
                    values.add(row[i])
                    row[i] = values.index(row[i])
                dtype['values'][i] = list(values)
        X = np.array(dataset, ndmin=2)

        lset = []
        for result_type, v in y.items():
            lset.append((result_type, X, np.array(v),dtype))
        return lset
示例#2
0
文件: build.py 项目: hamidgh09/npf
    def writeversion(self,
                     testie,
                     all_results: Dataset,
                     allow_overwrite: bool = False,
                     kind=False,
                     reload=True):
        if not reload and all_results:
            prev = self.load_results(testie=testie, kind=kind, cache=True)
            if prev and len(all_results) < len(prev):
                print(
                    "ERROR ! Have less results than before. Forcing update write !"
                )
                reload = True
                return
        if reload:
            results = self.load_results(testie=testie, kind=kind, cache=False)
            if results:
                results.update(all_results)
                all_results = results

        if kind:
            for kind, kresult in all_results.items():
                filename = self.__resultFilename(testie) + '-' + kind
                self._writeversion(filename, kresult, allow_overwrite)
        else:
            filename = self.__resultFilename(testie)
            self._writeversion(filename, all_results, allow_overwrite)
示例#3
0
 def writeversion(self, script, all_results: Dataset):
     filename = self.__resultFilename(script)
     try:
         if not os.path.exists(os.path.dirname(filename)):
             os.makedirs(os.path.dirname(filename))
     except OSError:
         print("Error : could not create %s" % os.path.dirname(filename))
     f = open(filename, 'w+')
     f.seek(0)
     for run, results in all_results.items():
         v = []
         for key, val in sorted(run.variables.items()):
             if type(val) is tuple:
                 val = val[1]
             v.append(key + ":" + str(val).replace(':','\:'))
         type_results = []
         for t,r in results.items():
             str_results = []
             if r is None:
                 pass
             else:
                 for val in r:
                     str_results.append(str(val))
             type_results.append(t+':'+(','.join(str_results)))
         f.write(','.join(v) + "={" + '},{'.join(type_results) + "}\n")
     f.close()
示例#4
0
    def buildDataset(cls, all_results: Dataset, testie: Testie):
        dtype = testie.variables.dtype()
        y = []
        dataset = []
        for i, (run, results) in enumerate(all_results.items()):
            vars = list(run.variables.values())
            if not results is None:
                dataset.append(vars)
                y.append(np.mean(results))
        dtype['formats'] = dtype['formats']
        dtype['names'] = dtype['names']

        for i, f in enumerate(dtype['formats']):
            if f is str:
                dtype['formats'][i] = int
                values = OrderedSet()
                for row in dataset:
                    values.add(row[i])
                    row[i] = values.index(row[i])
        X = np.array(dataset, ndmin=2)
        return X, np.array(y, dtype=[('result', float)])
示例#5
0
    def run(build: Build,
            all_results: Dataset,
            testie: Testie,
            max_depth=3,
            filename=None):
        print("Building dataset...")
        X, y = Statistics.buildDataset(all_results, testie)
        print("Learning dataset built with %d samples and %d features..." %
              (X.shape[0], X.shape[1]))
        clf = tree.DecisionTreeRegressor(max_depth=max_depth)
        clf = clf.fit(X, y)

        if max_depth is None or max_depth > 8:
            print("No tree graph when maxdepth is > 8")
        else:
            dot_data = tree.export_graphviz(
                clf,
                out_file=None,
                filled=True,
                rounded=True,
                special_characters=True,
                feature_names=testie.variables.dtype()['names'])
            graph = pydotplus.graph_from_dot_data(dot_data)
            if filename:
                f = filename
            else:
                f = build.result_path(testie.filename, 'pdf', suffix='_clf')
            graph.write(f, format=os.path.splitext(f)[1][1:])
            print("Decision tree visualization written to %s" % f)

        print("")
        print("Feature importances :")
        # noinspection PyUnresolvedReferences
        for key, f in zip(testie.variables.dtype()['names'],
                          clf.feature_importances_):
            print("  %s : %0.2f" % (key, f))

        vars_values = {}
        for run, results in all_results.items():
            for k, v in run.variables.items():
                vars_values.setdefault(k, set()).add(v)

        print('')
        print("Better :")
        best = X[y['result'].argmax()]
        print("  ", end='')
        f = next(iter(all_results.items()))
        for i, (k, v) in enumerate(f[0].variables.items()):
            print("%s = %s, " % (k, best[i]), end='')
        print(' : %.02f' % y['result'].max())

        print('')
        print("Means and std/mean per variables :")
        for k, vals in vars_values.items():
            if len(vals) is 1:
                continue
            print("%s :" % k)
            for v in sorted(vals):
                tot = 0
                std = 0
                n = 0
                for run, results in all_results.items():
                    if run.variables[k] == v:
                        if not results is None:
                            tot += np.mean(results)
                            std += np.std(results)
                            n += 1
                if n == 0:
                    print("  %s : None" % v)
                else:
                    print("  %s : (%.02f,%.02f), " % (v, tot / n, std / n /
                                                      (tot / n)))
            print("")