Exemplos de Classifier.Classifier em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: classes.Classifier

Classe / Tipo: Classifier

Método / Função: Classifier

Exemplos em hotexamples.com: 2

Classifier.Classifier em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de classes.Classifier.Classifier.Classifier em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Classifier(2)

createVectSpaceCategory(1)

createVectSpacePost(1)

createVectSpaceSubcategory(1)

getModel(1)

get_classifier_list(1)

get_stacking(1)

trainModel(1)

Métodos Frequentes

Classifier (2)

createVectSpaceCategory (1)

createVectSpacePost (1)

createVectSpaceSubcategory (1)

getModel (1)

get_classifier_list (1)

get_stacking (1)

trainModel (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: train_classifier.py Projeto: janes/experiment_ic_productsimilarity

def main(sc): start = timer() categs = ["Computers & Tablets", "Video Games", "TV & Home Theater"]# , "Musical Instruments"] stpwrds = stopwords.words('english') tbl_translate = dict.fromkeys(i for i in xrange(sys.maxunicode) if unicodedata.category(unichr(i)).startswith('S') or unicodedata.category(unichr(i)).startswith('P') or unicodedata.category(unichr(i)).startswith('N')) productRDD = sc.parallelize(findProductsByCategory(categs)) corpusRDD = (productRDD.map(lambda s: (s[0], word_tokenize(s[1].translate(tbl_translate).lower()), s[2], s[3])) .map(lambda s: (s[0], [PorterStemmer().stem(x) for x in s[1] if x not in stpwrds], s[2], s[3] )) .map(lambda s: (s[0], [x[0] for x in pos_tag(s[1]) if x[1] == 'NN' or x[1] == 'NNP'], s[2], s[3])) .cache()) idfsRDD = idfs(corpusRDD) idfsRDDBroadcast = sc.broadcast(idfsRDD.collectAsMap()) tfidfRDD = corpusRDD.map(lambda x: (x[0], tfidf(x[1], idfsRDDBroadcast.value), x[2], x[3])) category = productRDD.map(lambda x: x[2]).distinct().collect() categoryAndSubcategory = productRDD.map(lambda x: (x[2], x[3])).distinct().collect() tokens = corpusRDD.flatMap(lambda x: x[1]).distinct().collect() insertTokensAndCategories(tokens, category, categoryAndSubcategory) classifier = Classifier(sc, 'NaiveBayes') trainingVectSpaceCategoryRDD, testVectSpaceCategoryRDD = classifier.createVectSpaceCategory(tfidfRDD, category, tokens).randomSplit([8, 2], seed=0L) modelNaiveBayesCategory = classifier.trainModel(trainingVectSpaceCategoryRDD, '/dados/models/naivebayes/category_new') predictionAndLabelCategoryRDD = testVectSpaceCategoryRDD.map(lambda p : (category[int(modelNaiveBayesCategory.predict(p.features))], category[int(p.label)])) acuraccyCategory = float(predictionAndLabelCategoryRDD.filter(lambda (x, v): x[0] == v[0]).count())/float(predictionAndLabelCategoryRDD.count()) print 'the accuracy of the Category Naive Bayes model is %f' % acuraccyCategory #training in this second way just for test trainingVectSpaceSubcategory, testVectSpaceSubcategory = classifier.createVectSpaceSubcategory(tfidfRDD, categoryAndSubcategory, tokens).randomSplit([8, 2], seed=0L) modelNaiveBayesSubcategory = classifier.trainModel(trainingVectSpaceSubcategory, '/dados/models/naivebayes/subcategory_new') predictionAndLabelSubcategory = testVectSpaceSubcategory.map(lambda p : (categoryAndSubcategory[int(modelNaiveBayesSubcategory.predict(p.features))], categoryAndSubcategory[int(p.label)])) acuraccySubcategory = float(predictionAndLabelSubcategory.filter(lambda (x, v): x[0] == v[0]).count())/float(predictionAndLabelSubcategory.count()) print 'the accuracy of the Subcategory Naive Bayes model is %f' % acuraccySubcategory #test with DecisionTree Model classifierDT = Classifier(sc, 'DecisionTree') trainingVectSpaceCategory, testVectSpaceCategory = classifierDT.createVectSpaceCategory(tfidfRDD, category, tokens).randomSplit([8, 2], seed=0L) modelDecisionTreeCategory = classifierDT.trainModel(trainingVectSpaceCategory, '/dados/models/dt/category_new') predictions = modelDecisionTreeCategory.predict(testVectSpaceCategory.map(lambda x: x.features)) predictionAndLabelCategory = testVectSpaceCategory.map(lambda lp: lp.label).zip(predictions) acuraccyDecisionTree = float(predictionAndLabelCategory.filter(lambda (x, v): x == v).count())/float(predictionAndLabelCategory.count()) print 'the accuracy of the Decision Tree model is %f' % acuraccyDecisionTree elap = timer()-start print 'it tooks %d seconds' % elap

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Santander-Train Ensemble.py Projeto: joseferrercba/FundacionsadoskySantander

import mlflow import pandas as pd from sklearn.utils import shuffle import seaborn as sns import matplotlib.pyplot as plt from sklearn.svm import LinearSVC, SVC from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, StackingClassifier from classes.Vectorizer import Vectorizer from classes.Classifier import Classifier from classes.Resample import Resample from classes.ModelBuilder import ModelBuilder from classes.Constans import * # In[ ]: classifier = Classifier() vectorizer = Vectorizer() resample = Resample() builder = ModelBuilder() classifier_list = [] # ### Get Info from CSV # In[ ]: df_train = shuffle(pd.read_csv('data/train_preprocessed.csv', sep='|')) df_test = shuffle( pd.read_csv('data/test_santander.csv', usecols=['id', 'Pregunta'])) print(df_train['Intencion_cat_label'].value_counts()) # add one more sample because I have one case with just one sample and stratify need at least 2 samples df_train = resample.apply_resample(df_train, 'Pregunta', 5, 100)