Python Classifier示例，classification.classifier.Classifier Python示例

示例#1

0

显示文件

    def __init__(self,
                 dataset,
                 estimators: [(str, BaseEstimator)],
                 logger=None,
                 voting='hard',
                 weights=None,
                 *args,
                 **kwargs):
        """
        Parameters
        ----------
        voting: str, {‘hard’, ‘soft’} (default=’hard’) 
            If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, 
            predicts the class label based on the argmax of the sums of the predicted probabilities, 
            which is recommended for an ensemble of well-calibrated classifiers.
        weights: array-like, shape = [n_classifiers], optional (default=`None`)
            Sequence of weights (float or int) to weight the occurrences of predicted class labels (hard voting) 
            or class probabilities before averaging (soft voting). Uses uniform weights if None.
        """

        self.classifier = VotingClassifier(estimators,
                                           voting=voting,
                                           weights=weights)

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger)

示例#2

0

显示文件

    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=500,
                 max_leaf_nodes=16,
                 verbose=0,
                 model: RandomForestClassifier = None,
                 logger=None):
        self.scores = scores = [
            'recall_weighted', 'precision_micro', 'precision_weighted'
        ]
        self.tuned_parameters = {
            'criterion': ['gini', 'entropy'],
            'class_weight': ['balanced', None],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None],
            'bootstrap': [True, False]
        }
        if model == None:
            self.classifier = RandomForestClassifier(n_estimators=150,
                                                     max_depth=5,
                                                     criterion='gini',
                                                     class_weight=None,
                                                     bootstrap=True)
            # self.classifier = RandomForestClassifier(n_estimators=n_estimators, max_leaf_nodes=16, n_jobs=-1,
            #                                         verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)

示例#3

0

显示文件

    def __init__(self,
                 dataset: "DataSet",
                 verbose=0,
                 model: SGDClassifier = None,
                 logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters_tf = {
            'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
            'tfidf__use_idf': [True, False],
            'tfidf__smooth_idf': [True, False],
            'tfidf__sublinear_tf': [True, False],
            'clf__loss': ['hinge', 'modified_huber'],
            'clf__penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'clf__class_weight': ['balanced', None],
            'clf__fit_intercept': [True, False]
        }
        self.tuned_parameters = {
            'loss': ['hinge', 'modified_huber'],
            'penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'class_weight': ['balanced', None],
            'fit_intercept': [True, False]
        }
        if model == None:
            self.classifier = SGDClassifier()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)

示例#4

0

显示文件

    def __init__(self, dataset: "DataSet", loss='squared_hinge', max_iter=1000, verbose=0, model=None,
                 logger: "Logger" = None):
        # self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'C': [1, 10, 100, 1000], 'gamma': [1e-3, 1e-4, 'auto'],
                                 'kernel': ['rbf', 'linear', 'poly'], 'class_weight': ['balanced', None],
                                 'degree': [3, 4, 5]}
        if model == None:
            self.classifier = SVC()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)

示例#5

0

显示文件

文件： multinomial_naive_bayes.py 项目： swordbreaker/NLP

    def __init__(self, dataset: "DataSet", verbose=0, model: MultinomialNB = None, logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
                                 'tfidf__use_idf': [True, False],
                                 'tfidf__smooth_idf': [True, False],
                                 'tfidf__sublinear_tf': [True, False],
                                 'clf__alpha': [1e-2, 1e-3, 1e-4],
                                 'clf__fit_prior': [True, False]}
        if model == None:
            self.classifier = MultinomialNB()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)

示例#6

0

显示文件

    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=120,
                 verbose=0,
                 model=None,
                 logger: "Logger" = None):
        self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.tuned_parameters = {
            'loss': ['deviance'],
            'learning_rate': [0.3, 0.1, 0.03, 0.01, 0.003, 0.001],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None]
        }
        if model == None:
            self.classifier = GradientBoostingClassifier(n_estimators=100,
                                                         max_depth=5,
                                                         loss='deviance',
                                                         learning_rate=0.1)
            # self.classifier = GradientBoostingClassifier(max_depth=2, n_estimators=n_estimators, verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)

示例#7

0

显示文件

文件： custom_classifier.py 项目： DelphianCalamity/text-mining

    def populate_features(self):
        tasks = Classifier.populate_features(self)

        # Add classifier tasks
        clf1 = LogisticRegression(solver='lbfgs',
                                  multi_class='multinomial',
                                  max_iter=100,
                                  random_state=1)
        clf2 = RandomForestClassifier(n_estimators=100, criterion='entropy')
        clf3 = SVC(kernel='linear', probability=True)

        tasks.append(('clf',
                      VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
                                                   ('svm', clf3)],
                                       voting='soft',
                                       weights=[4, 2, 5])))
        self.pipeline = Pipeline(tasks)

示例#8

0

显示文件

文件： main.py 项目： manisero/SemViii

if __name__ == "__main__":

    file_handler = FileHandler()

    configuration_provider = ConfigurationProvider(configuration_file)
    tree_browser = HTMLTreeBrowser()

    specification_registry = SpecificationRegistry(configuration_provider, tree_browser)
    content_downloader = WebPageContentDownloader()
    tree_builder = HTMLTreeBuilder()

    configuration_generator = ConfigurationGenerator(configuration_provider,
                                                     specification_registry,
                                                     content_downloader,
                                                     tree_builder)

    url_map = file_handler.get_url_map(configuration_provider.get_classified_input_file_name())

    configuration_generator.generate_configuration(url_map)

    url_map_to_classify = file_handler.get_url_map(configuration_provider.get_unclassified_input_file_name())

    classifier = Classifier(configuration_provider, specification_registry, content_downloader, tree_builder)

    classification = classifier.classify(url_map_to_classify.keys())

    file_handler.write_classification(configuration_provider.get_output_file_name(), classification)

    SummaryPrinter().print_summary(classification, url_map_to_classify)

示例#9

0

显示文件

 def _classify(self):
     classifier = Classifier(self.preprocessed_contents,
                             max_pages=self.max_pages,
                             webpage_title=self._website.get_title())
     classifier.classify()
     self.webpage_topic_is_plural = classifier.is_webpage_topic_plural()

示例#10

0

显示文件

文件： custom_classifier.py 项目： DelphianCalamity/text-mining

 def __init__(self, path, train_df, test_df, features):
     Classifier.__init__(self, path, train_df, test_df, features)

示例#11

0

显示文件

文件： main.py 项目： samhendrickx/sensing-ms-device

    mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    featuresNames = ["heartrate", "temperature", "steps", "activity"]
    allFeaturesNames = [
        "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min",
        "temperature_std", "temperature_max", "temperature_avg", "temperature_min",
        "steps", "activity_score", "activity_minutes"

    ]
    mobileData = getData(mobileUrl)
    sensorData = getData(sensorUrl)
    groups = groupData(mobileData, sensorData)
    features, labels = extractFeatures(groups, featuresNames)
    clf = Classifier()
    clf.train(features, labels)
    rules = clf.getDangerousRules(allFeaturesNames)
    print rules
    maxSeconds = 1
    seconds = maxSeconds
    previousSensorData = None
    while True:
        if seconds > 0:
            print "\nWaiting for "+str(seconds)+" seconds."
            sleep(1)
            seconds -= 1
        else:
            print "\nStarting again..."
            latestSensorData = getLatestSensorData(sensorUrl)
            if previousSensorData is None or latestSensorData["datetime"] != previousSensorData["datetime"]:

示例#12

0

显示文件

from classification.classifier import Classifier
from classification.text_processor import TextProcessor
from preprocess.stemmer import Stemmer
from preprocess.tokenizer import Tokenizer

text_processor = TextProcessor(Stemmer(), Tokenizer())
classifier = Classifier(text_processor)

while True:
    user_input = input()
    print(classifier.classify(user_input))

示例#13

0

显示文件

 def populate_features(self):
     tasks = Classifier.populate_features(self)
     tasks.append(('clf',
                   RandomForestClassifier(n_estimators=100,
                                          criterion='entropy')))
     self.pipeline = Pipeline(tasks)

示例#14

0

显示文件

文件： main.py 项目： samhendrickx/sensing-ms-device

if __name__ == "__main__":
    mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    featuresNames = ["heartrate", "temperature", "steps", "activity"]
    allFeaturesNames = [
        "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min",
        "temperature_std", "temperature_max", "temperature_avg",
        "temperature_min", "steps", "activity_score", "activity_minutes"
    ]
    mobileData = getData(mobileUrl)
    sensorData = getData(sensorUrl)
    groups = groupData(mobileData, sensorData)
    features, labels = extractFeatures(groups, featuresNames)
    clf = Classifier()
    clf.train(features, labels)
    rules = clf.getDangerousRules(allFeaturesNames)
    print rules
    maxSeconds = 1
    seconds = maxSeconds
    previousSensorData = None
    while True:
        if seconds > 0:
            print "\nWaiting for " + str(seconds) + " seconds."
            sleep(1)
            seconds -= 1
        else:
            print "\nStarting again..."
            latestSensorData = getLatestSensorData(sensorUrl)
            if previousSensorData is None or latestSensorData[

示例#15

0

显示文件

from statistics.analyzer import Analyzer
from classification.classifier import Classifier

file_path = 'C:\\Users\Roman\\Documents\\lab02\\statistics\\default_data.csv'

analyzer = Analyzer(file_path)

analyzer.execute()

statistics_dictionary = analyzer.read_csv_table('statistics.csv')
freq_statistics = analyzer.read_csv_table('statistics_dictionary.csv')
statistics_unnormal = analyzer.read_csv_table(
    'statistics_dictionary_unnormal_probability.csv')
classifier = Classifier(statistics_dictionary, statistics_unnormal,
                        freq_statistics).calc_normal_probability()

示例#16

0

显示文件

        for i in range(len(hp.lr_scheduler_step_size)):
            hp.lr_scheduler_step_size[i] = int(hp.lr_scheduler_step_size[i])
    elif hp.optimizer == 'adam' or hp.optimizer == 'sgdr' or hp.optimizer == 'adamw':
        hp.lr_scheduler_step_size = int(hp.lr_scheduler_step_size)

    hp.manual_seed = random.randint(1, 10000)  # fix seed
    random.seed(hp.manual_seed)
    np.random.seed(hp.manual_seed)
    torch.manual_seed(hp.manual_seed)

    if hp.name == 'modelnet':
        hp.num_class = 40
        if not os.path.exists("modelnet_log"):
            os.makedirs("modelnet_log")
        hp.name = os.path.join("modelnet_log", hp.name + hp.index)
        trainer = Classifier(hp)
    elif hp.name == 'modelnet10':
        hp.num_class = 10
        if not os.path.exists("modelnet10_log"):
            os.makedirs("modelnet10_log")
        hp.name = os.path.join("modelnet10_log", hp.name + hp.index)
        trainer = Classifier10(hp)
    elif hp.name == 'part_shapenet':
        if not os.path.exists("part_shapenet_log"):
            os.makedirs("part_shapenet_log")
        hp.name = os.path.join("part_shapenet_log", hp.name + hp.index)
        trainer = PartSegmentor(hp)
    elif hp.name == 's3dis':
        hp.num_class = 13
        if not os.path.exists("s3dis_log"):
            os.makedirs("s3dis_log")

示例#17

0

显示文件

 def populate_features(self):
     tasks = Classifier.populate_features(self)
     tasks.append(('clf', LinearSVC()))
     self.pipeline = Pipeline(tasks)