Exemplo n.º 1
0
    def run(self, args, opts):
        if len(args) < 1:
            raise UsageError()
        elif len(args) > 1:
            raise UsageError(
                "running 'scrapy benchmark' with more than one argument is not supported"
            )
        classifier_name = args[0]
        status = Status()
        CF = ClassifierFactory(status.classifiers[classifier_name])
        if opts.reviewed and opts.unreviewed:
            CF.create_data_set("both")
        elif opts.reviewed:
            CF.create_data_set("reviewed")
        elif opts.unreviewed:
            CF.create_data_set("unreviewed")
        results = []
        lc = CF.create_classifier(
            LogisticRegression(C=1e5),
            status.classifiers[classifier_name]['features']())
        lc.benchmark(opts.topn, opts.print_cm, opts.print_report, verbose=True)


##        for clf, name in (
##            (RidgeClassifier(tol=1e-2, solver="lsqr"), "Ridge Classifier"),
##            (Perceptron(n_iter=50), "Perceptron"),
##            (PassiveAggressiveClassifier(n_iter=50), "Passive-Aggressive")
##            ):
##            print('=' * 80)
##            print(name)
##            c = CF.create_classifier(clf, status.classifiers[classifier_name]['features']())
##            results.append(c.benchmark(opts.topn, opts.print_cm, opts.print_report, verbose=True))

#Multiple classifier comparison
#        results.append(classifier)
##        indices = np.arange(len(results))
##        results = [[x[i] for x in results] for i in range(4)]
##        clf_names, score, training_time, test_time = results
##        training_time = np.array(training_time) / np.max(training_time)
##        test_time = np.array(test_time) / np.max(test_time)
##        plt.figure(figsize=(12, 8))
##        plt.title("Score")
##        plt.barh(indices, score, .2, label="score", color='r')
##        plt.barh(indices + .3, training_time, .2, label="training time", color='g')
##        plt.barh(indices + .6, test_time, .2, label="test time", color='b')
##        plt.yticks(())
##        plt.legend(loc='best')
##        plt.subplots_adjust(left=.25)
##        plt.subplots_adjust(top=.95)
##        plt.subplots_adjust(bottom=.05)
##
##        for i, c in zip(indices, clf_names):
##            plt.text(-.3, i, c)
##        plt.show()
Exemplo n.º 2
0
 def __init__(self):
     
     self.status = Status()
     self.classifiers = []
     self.exporters = {}
     for classifier in self.status.classifiers.keys():
         CF = ClassifierFactory(self.status.classifiers[classifier])            
         CF.create_data_set("both")
         lc = lc = CF.create_classifier(LogisticRegression(C=1e5), self.status.classifiers[classifier]['features']())
         lc.fit()
         self.classifiers.append((classifier, lc))
     
     self.classifiers = sorted(self.classifiers, key = lambda a: a[1].estimate_accuracy(5, verbose=True))
     print "Classifier {0} needs the most improvement; selected for export".format(self.classifiers[0][0])
     for classification in self.status.classifiers[self.classifiers[0][0]]['classifications']:
         f = file("{0}.json".format(classification), "wb")
         self.exporters[classification] = JsonItemExporter(f)
Exemplo n.º 3
0
    def run(self, args, opts):
        if len(args) < 1:
            raise UsageError()
        elif len(args) > 1:
            raise UsageError("running 'scrapy benchmark' with more than one argument is not supported")
        classifier_name = args[0]
        status = Status()
        CF = ClassifierFactory(status.classifiers[classifier_name])
        if opts.reviewed and opts.unreviewed:
            CF.create_data_set("both")
        elif opts.reviewed:
            CF.create_data_set("reviewed")
        elif opts.unreviewed:
            CF.create_data_set("unreviewed")
        results = []
        lc = CF.create_classifier(LogisticRegression(C=1e5), status.classifiers[classifier_name]['features']())
        lc.benchmark(opts.topn, opts.print_cm, opts.print_report, verbose=True)
        
        
        
##        for clf, name in (
##            (RidgeClassifier(tol=1e-2, solver="lsqr"), "Ridge Classifier"),
##            (Perceptron(n_iter=50), "Perceptron"),
##            (PassiveAggressiveClassifier(n_iter=50), "Passive-Aggressive")
##            ):
##            print('=' * 80)
##            print(name)
##            c = CF.create_classifier(clf, status.classifiers[classifier_name]['features']())
##            results.append(c.benchmark(opts.topn, opts.print_cm, opts.print_report, verbose=True))        
        
#Multiple classifier comparison        
#        results.append(classifier)
##        indices = np.arange(len(results))
##        results = [[x[i] for x in results] for i in range(4)]
##        clf_names, score, training_time, test_time = results
##        training_time = np.array(training_time) / np.max(training_time)
##        test_time = np.array(test_time) / np.max(test_time)
##        plt.figure(figsize=(12, 8))
##        plt.title("Score")
##        plt.barh(indices, score, .2, label="score", color='r')
##        plt.barh(indices + .3, training_time, .2, label="training time", color='g')
##        plt.barh(indices + .6, test_time, .2, label="test time", color='b')
##        plt.yticks(())
##        plt.legend(loc='best')
##        plt.subplots_adjust(left=.25)
##        plt.subplots_adjust(top=.95)
##        plt.subplots_adjust(bottom=.05)
##
##        for i, c in zip(indices, clf_names):
##            plt.text(-.3, i, c)
##        plt.show()
Exemplo n.º 4
0
    def __init__(self):

        self.status = Status()
        self.classifiers = []
        self.exporters = {}
        for classifier in self.status.classifiers.keys():
            CF = ClassifierFactory(self.status.classifiers[classifier])
            CF.create_data_set("both")
            lc = lc = CF.create_classifier(
                LogisticRegression(C=1e5),
                self.status.classifiers[classifier]['features']())
            lc.fit()
            self.classifiers.append((classifier, lc))

        self.classifiers = sorted(
            self.classifiers,
            key=lambda a: a[1].estimate_accuracy(5, verbose=True))
        print "Classifier {0} needs the most improvement; selected for export".format(
            self.classifiers[0][0])
        for classification in self.status.classifiers[self.classifiers[0]
                                                      [0]]['classifications']:
            f = file("{0}.json".format(classification), "wb")
            self.exporters[classification] = JsonItemExporter(f)