def show_coefficients(classifier, clf, feature_names, filename, top_features=20): if classifier == "svml": coef = clf.coef_.ravel() elif classifier == "rf": coef = clf.feature_importances_ elif classifier == "dtree": export_graphviz(clf, out_file=(filename + '.dot'), feature_names=feature_names) coef = clf.feature_importances_ else: return top_positive_coefficients = np.argsort(coef)[-top_features:][::-1] #top_negative_coefficients = np.argsort(coef)[:top_features] #top_coefficients = np.hstack([top_negative_coefficients, top_positive_coefficients]) feature_names = np.array(feature_names) print( list( zip(feature_names[top_positive_coefficients], map(lambda x: x, sorted(coef, reverse=True)))))
def export_tree(*data): ''' 输出决策图 :return: None ''' X_train, X_test, y_train, y_test = data clf = DecisionTreeClassifier() clf.fit(X_train, y_train) # 然后通过Graphviz的dot工具,在命令行中运行命令 # dot.exe -Tpdf F:/out -o F:/out.pdf生成pdf格式的决策树, # 或者执行dot.exe -Tpng F:/out -o F:/out.png来生成陪png格式的决策图。 # 其中-T选项指定了输出文件格式,-o选项指定了输出文件名 export_graphviz(clf, "F:/out")
def trainDecisionTree(X, y, max_depth=16, step_size=1): depths = range(1, max_depth+1, step_size) scores = np.empty(len(depths)) def validatorDT(X, y, scores, depth, i): model = DecisionTreeClassifier(max_depth=depth, criterion='entropy', random_state=1) scores[i] = cross_val_score(model, X, y, cv=NUM_CV_FOLDS).mean() threads = [] i = 0 for d in depths: threads.append(Thread(target=validatorDT, args = (X, y, scores, d, i))) threads[i].start() i = i + 1 for t in threads: t.join() best_depth = depths[np.argmax(scores)] model = DecisionTreeClassifier(max_depth=best_depth, criterion='entropy', random_state=1) model = model.fit(X,y) dot_data = export_graphviz(model, out_file=None) graph = graphviz.Source(dot_data) graph.render("decisionTree") return model, best_depth
def Tree_freture_importance_plot(X, y): regr = DecisionTreeRegressor() feat_labels = X.columns[:] regr.fit(X, y) out = export_graphviz(regr) graph = pydotplus.graph_from_dot_data(out) graph.write_pdf('tree.pdf')
def convert_decision_tree_to_ipython_image(clf, feature_names=None, class_names=None, image_filename=None, tmp_dir=None): dot_filename = mkstemp(suffix='.dot', dir=tmp_dir)[1] with open(dot_filename, "w") as out_file: export_graphviz(clf, out_file=out_file, feature_names=feature_names, class_names=class_names, filled=True, rounded=True, special_characters=True) from IPython.display import Image image_filename = image_filename or ('%s.png' % dot_filename) subprocess.call(('dot -Tpng -o %s %s' % (image_filename, dot_filename)).split(' ')) image = Image(filename=image_filename) os.remove(dot_filename) return image
def show_coefficients(classifier, clf, feature_names, top_features=20): if classifier == "svml": coef = clf.coef_.ravel() elif classifier == "rf": if isinstance(clf, Pipeline): clf = clf.named_steps['clf'] coef = clf.feature_importances_ elif classifier == "dt": export_graphviz(clf, out_file='tree.dot', feature_names=feature_names) coef = clf.feature_importances_ elif classifier == 'xgb': coef = clf.feature_importances_ else: return top_positive_coefficients = np.argsort(coef)[-top_features:][::-1] # top_negative_coefficients = np.argsort(coef)[:top_features] # top_coefficients = np.hstack([top_negative_coefficients, top_positive_coefficients]) feature_names = np.array(feature_names) print( list( zip(feature_names[top_positive_coefficients], map(lambda x: x, sorted(coef, reverse=True)))))
def dt_run_plot(importance_matirx, group_feature, result, fea_number): features_top = importance_matirx.mean().sort_values( ascending=False)[:fea_number].index feature_matrix = group_feature[features_top] # class_weights = class_weight.compute_class_weight('balanced', np.unique(results), results) cla = DecisionTreeClassifier(max_depth=4, max_features=None, max_leaf_nodes=None, min_samples_leaf=0.05) # , class_weight ={0:class_weights[0], 1:class_weights[1]}) # print(class_weights[0], class_weights[1]) cla = cla.fit(feature_matrix, result) dot_data = StringIO() out = export_graphviz(cla, out_file=dot_data, filled=True, rounded=True, impurity=True, special_characters=True, proportion=True, feature_names=features_top) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) Image(graph.create_png()) graph.write_pdf( r"C:\Users\s1883483\Desktop\2018 Rotman Datathon\output\dt_" + names + ".pdf")
previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2]) previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3]) from sklearn.tree import DecisionTreeClassifier, export classificador = DecisionTreeClassifier(criterion="entropy") #treinamento do algoritmo - Construindo a Árvore de Decisão classificador.fit(previsores, classe) #imprimir a árvore print(classificador.feature_importances_) export.export_graphviz(classificador, out_file="arvore.dot", feature_names=[ "historia", "divida", "garantia", "renda", ], class_names=["alto", "moderado", "baixo"], filled=True, leaves_parallel=True) #história boa, dívida alta, garantias nenhuma, renda > 35 #história ruim, dívida alta, garantias adequada, renda < 15 #predict = execução do algoritmo resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]]) print(resultado)
#%%[markdown] # # Árvores de Decisão # O classificador por árvores de decisão funciona com base em teoria da informação. # Primeiramente, estima-se o ganho de informacao por cada variavel preditora #%% # Algoritmos de arvores de decisao para prever dados # classificador por arvores de decisao from sklearn.tree import DecisionTreeClassifier, export from db_loaders.risco_credito import previsores, risco # Classificador por arvores de decisao modelo = DecisionTreeClassifier(criterion='entropy') modelo.fit(previsores, risco) export.export_graphviz( modelo, out_file="assets/arvore.dot", feature_names=["Histórico", "Dívida", "Garantias", "Renda"], class_names=["Alto", "Moderado", "Baixo"], filled=True, leaves_parallel=True) resultado = modelo.predict([[0, 0, 1, 2], [2, 0, 0, 0]]) #%%
def _decision_tree_regression_train( table, feature_cols, label_col, # fig_size=np.array([6.4, 4.8]), criterion='mse', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, presort=False, sample_weight=None, check_input=True, X_idx_sorted=None): param_validation_check = [ greater_than_or_equal_to(min_samples_split, 2, 'min_samples_split'), greater_than_or_equal_to(min_samples_leaf, 1, 'min_samples_leaf'), greater_than_or_equal_to(min_weight_fraction_leaf, 0.0, 'min_weight_fraction_leaf') ] if max_depth is not None: param_validation_check.append( greater_than_or_equal_to(max_depth, 1, 'max_depth')) validate(*param_validation_check) regressor = DecisionTreeRegressor(criterion, splitter, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, random_state, max_leaf_nodes, min_impurity_decrease, min_impurity_split, presort) regressor.fit(table[feature_cols], table[label_col], sample_weight, check_input, X_idx_sorted) try: from sklearn.externals.six import StringIO from sklearn.tree import export_graphviz import pydotplus dot_data = StringIO() export_graphviz(regressor, out_file=dot_data, feature_names=feature_cols, filled=True, rounded=True, special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) from brightics.common.repr import png2MD fig_tree = png2MD(graph.create_png()) except: fig_tree = "Graphviz is needed to draw a Decision Tree graph. Please download it from http://graphviz.org/download/ and install it to your computer." # json model = _model_dict('decision_tree_regression_model') model['feature_cols'] = feature_cols model['label_col'] = label_col feature_importance = regressor.feature_importances_ model['feature_importance'] = feature_importance model['max_features'] = regressor.max_features_ model['n_features'] = regressor.n_features_ model['n_outputs'] = regressor.n_outputs_ model['tree'] = regressor.tree_ get_param = regressor.get_params() model['parameters'] = get_param model['regressor'] = regressor # report indices = np.argsort(feature_importance) sorted_feature_cols = np.array(feature_cols)[indices] plt.title('Feature Importances') plt.barh(range(len(indices)), feature_importance[indices], color='b', align='center') for i, v in enumerate(feature_importance[indices]): plt.text(v, i, " {:.2f}".format(v), color='b', va='center', fontweight='bold') plt.yticks(range(len(indices)), sorted_feature_cols) plt.xlabel('Relative Importance') plt.tight_layout() fig_feature_importances = plt2MD(plt) plt.clf() params = dict2MD(get_param) feature_importance_df = pd.DataFrame(data=feature_importance, index=feature_cols).T # Add tree plot rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Decision Tree Regression Train Result | ### Decision Tree | {fig_tree} | | ### Feature Importance | {fig_feature_importances} | | ### Parameters | {list_parameters} | """.format(fig_tree=fig_tree, fig_feature_importances=fig_feature_importances, list_parameters=params))) model['_repr_brtc_'] = rb.get() return {'model': model}
devemos mudar a escala dos valores, pois a diferença de um número para o outro é muito alta! """ # ele usa a padronização para ajustar a escala dos valores scaler = StandardScaler() # ele ajusta os valores e os aplica previsores = scaler.fit_transform(previsores) # dividindo a base dados em testes e treinamento previsores_treinamento, previsores_teste, attr_classe_treinamento, attr_classe_teste = train_test_split( previsores, attr_classe, test_size=0.3, random_state=0) clf = DecisionTreeClassifier(criterion="entropy") clf.fit(previsores_treinamento, attr_classe_treinamento) resultado = clf.predict(previsores_teste) acuracia = accuracy_score(attr_classe_teste, resultado) * 100 matriz = confusion_matrix(attr_classe_teste, resultado) ### Visualizar a árvore de decisão export.export_graphviz( clf, out_file="arvore_decisao.dot", feature_names=["income", "age", "loan"], class_names=["0", "1"], filled=True, leaves_parallel=True ) # a extensao dot é necessaria; nome dos nós; nomes das classes * # * o nome das classes deve ser posto pela ordem de quem aparece primeiro no dataset.
print("Veri seti Eğitiliyor.......\n") clf = tree.DecisionTreeClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) feat_importance = clf.tree_.compute_feature_importances(normalize=False) print("feature importance = " + str(feat_importance)) from sklearn.tree.export import export_graphviz from sklearn.feature_selection import mutual_info_classif from StringIO import StringIO out = StringIO() out = export_graphviz(clf, out_file='test') # In[14]: print("Eski Test Veriseti Accuracy: "), accuracy_score(y_test, y_pred) # In[15]: print("Veri seti Eğitiliyor.......\n") clf = tree.DecisionTreeClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) feat_importance = clf.tree_.compute_feature_importances(normalize=False) print("feature importance = " + str(feat_importance))
classLabel_test, predictions) #will compare prredictions with the original classLabel matrix = confusion_matrix( classLabel_test, predictions ) #confusion matrix ->Primary diagonal: correct classification // Other indexes->Incorrect classification, index [0,1] -> correct answer is 0 but classified as 1,etc #"age , workclass , final-weight , education , education-num , marital-status , occupation , relationship , race , sex , capital-gain , capital-loos , hour-per-week , native-country , income" #pred1=classificator.predict([[40,4,200000,12,14,7,4,1,2,1,1000,0,50,39]]) #returns class #"40,Private,200000,Masters,14,divorced,sales,husband,black,male,1000,0,50,united-states" export.export_graphviz( classificator, #generate a tree to visualize further, doesnt works with OHE out_file='censusTree.dot', feature_names=[ 'age', 'workclass', 'final-weight', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loos', 'hour-per-week', 'native-country' ], class_names='classLabel', filled=True, leaves_parallel=True) #PRECISION RESULTS : #Entropy/Gain #With all pre processing = 81.04 % #Without OHE = 81.28% #Without scaler = 81.02% #Gini #With all pre processing = 81.14 % #Without OHE = 80.06% #Without scaler = 81.11%
previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2]) previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3]) #print(previsores) # 3) Aplicar o treinamento classificador = DecisionTreeClassifier(criterion='entropy') classificador.fit(previsores, classe) print(classificador.feature_importances_ ) # Exibir a importância de cada atributo print( classificador.score(previsores, classe) ) # Verificar se o modelo está bom. Quanto mais próximo de 1 (100%) melhor. # 4) Exportar o gráfico de árvore de decisão para ser lido posteriormente pela ferramenta graphviz export.export_graphviz( classificador, out_file= 'arvore.dot', # .dot é a extensão utilizada pela ferramenta graphviz feature_names=['historia', 'divida', 'garantias', 'renda'], class_names=classificador.classes_, filled=True, leaves_parallel=True) # 5) Aplicar uma classificação com modelo já treinado, usando os seguintes exemplos: ''' história boa, dívida alta, garantias nenhuma, renda > 35 história ruim, dívida alta, garantias adequada, renda < 15 ''' resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]]) print(resultado)
linewidths=.5, fmt="d") plt.ylabel("Valores reais das notas médias") plt.xlabel("Valores previstos das notas médias") plt.savefig('cmadd.png') plt.show() #analise do numero de reviews agrupado por rating y = df.groupby(['Reviews'])['Rating'].mean() y.sort_values(ascending=False, inplace=True) #gerador de grafico para Árvore de Decisão export.export_graphviz(classificador, out_file='arvore_rating_3n.dot', feature_names=[ 'Reviews', 'Size', 'Installs', 'Content Rating', 'Genre', 'Category' ], class_names=['1', '2', '3', '4', '5'], filled=True, leaves_parallel=True) #Foi feita uma primeira análise com o número de nodos livre e obteve-se precisao de 0.6829643296432965 #Observou-se que houve um overfitting dos dados, dado o numero de nodos #Decisao: diminuir o numero de nodos para n=3 #precisao = 0.7404674046740467. Observou-se uma melhora de 0.057 na precisao #conclusao: #em uma arvore de decisao com 3 nodos, o numero de reviews tem grande relevancia para determinar rating #obs: o segundo atributo mais relevante, para numero de n=10, mostrou ser 'Installs', com uma melhora de 0.007% na precisao (0.7478474784747847)
# Realizando pre-processamento dos previsores que são dados categóricos pois # Decision-Tree não utiliza dados categóricos labelencoder = LabelEncoder() previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0]) previsores[:, 1] = labelencoder.fit_transform(previsores[:, 1]) previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2]) previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3]) # Aplicando o algoritmo de Árvores de Decisão from sklearn.tree import DecisionTreeClassifier, export classificador = DecisionTreeClassifier(criterion='entropy') classificador.fit(previsores, classe) print(classificador.feature_importances_) export.export_graphviz( classificador, out_file= r'C:\Users\allan\Documents\Python_Machine_Learning_Jones_Granatyr\Decision_Tree\arvore.dot', feature_names=['historia', 'divida', 'garantias', 'renda'], class_names=['alto', 'moderado', 'baixo'], filled=True, leaves_parallel=True) # Realizando a classificação com um dado não incluso nos previsores # historico bom, divida alta, garantia nenhuma e renda > 15 # historico ruim, divida alta, garantias adequada, renda < 15 resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]]) print(classificador.classes_)
classe = base.iloc[:, 4].values from sklearn.preprocessing import LabelEncoder labelencoder = LabelEncoder() previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0]) previsores[:, 1] = labelencoder.fit_transform(previsores[:, 1]) previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2]) previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3]) from sklearn.tree import DecisionTreeClassifier, export classificador = DecisionTreeClassifier(criterion='entropy') classificador.fit(previsores, classe) # Constroi a árvore de decisão print(classificador.feature_importances_ ) # Mostra a importância de cada atributo export.export_graphviz( classificador, out_file='arvore.dot', feature_names=['Historia', 'Divida', 'Garantias', 'Renda'], class_names=['Alto', 'Moderado', 'Baixo'], filled=True, leaves_parallel=True) # Histórico boa, divida alta, garantia nenhuma, renda > 35 # Histórico ruim, dívida alta, garantia adequada, renda < 15 resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]]) print(classificador.classes_) print(classificador.class_count_) print(classificador.class_prior_)
entradas_treinamento, entradas_teste, classe_treinamento, classe_teste = train_test_split( entradas, classe, test_size=0.30, random_state=0) # Instnacioando o classificar da arvores de devisão # Vai gerar uma arrvores de decisão de acordo com o criterio a entropia classificador = DecisionTreeClassifier(criterion='entropy') classificador.fit(entradas_treinamento, classe_treinamento) #print(classificador.feature_importances_) # Visualização da arvore export.export_graphviz(classificador, out_file='arvore.dot', feature_names=[ 'Alternativa', 'Bar', 'Sex/Sab', 'Faminto', 'Clientes', 'Preço', 'Chovendo', 'Reserva', 'Tipo', 'Espera estimada' ], class_names=['Sim', 'Não'], filled=True, leaves_parallel=True) # Usa a base de teste para ver o resultado resultado_teste = classificador.predict(entradas_teste) # Verificando a quantidade de acerto nos testes precisao = accuracy_score(classe_teste, resultado_teste) print(precisao)
previsores = base.iloc[:, 0:4].values classe = base.iloc[:, 4].values from sklearn.preprocessing import LabelEncoder labelEncoder = LabelEncoder() base.columns previsores[:, 0] = labelEncoder.fit_transform(previsores[:, 0]) previsores[:, 1] = labelEncoder.fit_transform(previsores[:, 1]) previsores[:, 2] = labelEncoder.fit_transform(previsores[:, 2]) previsores[:, 3] = labelEncoder.fit_transform(previsores[:, 3]) from sklearn.tree import DecisionTreeClassifier, export classificador = DecisionTreeClassifier(criterion="entropy") classificador.fit(previsores, classe) print(classificador.feature_importances_) export.export_graphviz( classificador, out_file='arvore_dot', feature_names=['historia', 'divida', 'garantias', 'renda'], class_names=classificador.classes_, filled=True, leaves_parallel=True) resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]]) print(classificador.classes_)
def _decision_tree_classification_train( table, feature_cols, label_col, # fig_size=np.array([6.4, 4.8]), criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None, presort=False, sample_weight=None, check_input=True, X_idx_sorted=None): classifier = DecisionTreeClassifier( criterion, splitter, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, random_state, max_leaf_nodes, min_impurity_decrease, min_impurity_split, class_weight, presort) classifier.fit(table[feature_cols], table[label_col], sample_weight, check_input, X_idx_sorted) from sklearn.externals.six import StringIO from sklearn.tree import export_graphviz import pydotplus dot_data = StringIO() export_graphviz(classifier, out_file=dot_data, feature_names=feature_cols, class_names=table[label_col].astype('str').unique(), filled=True, rounded=True, special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) from brightics.common.report import png2MD fig_tree = png2MD(graph.create_png()) # json model = _model_dict('decision_tree_classification_model') model['feature_cols'] = feature_cols model['label_col'] = label_col model['classes'] = classifier.classes_ feature_importance = classifier.feature_importances_ model['feature_importance'] = feature_importance model['max_features'] = classifier.max_features_ model['n_classes'] = classifier.n_classes_ model['n_features'] = classifier.n_features_ model['n_outputs'] = classifier.n_outputs_ model['tree'] = classifier.tree_ get_param = classifier.get_params() model['parameters'] = get_param model['classifier'] = classifier # report indices = np.argsort(feature_importance) sorted_feature_cols = np.array(feature_cols)[indices] plt.title('Feature Importances') plt.barh(range(len(indices)), feature_importance[indices], color='b', align='center') for i, v in enumerate(feature_importance[indices]): plt.text(v, i, " {:.2f}".format(v), color='b', va='center', fontweight='bold') plt.yticks(range(len(indices)), sorted_feature_cols) plt.xlabel('Relative Importance') plt.xlim(0, 1.1) plt.tight_layout() fig_feature_importances = plt2MD(plt) plt.clf() params = dict2MD(get_param) feature_importance_df = pd.DataFrame(data=feature_importance, index=feature_cols).T # Add tree plot rb = ReportBuilder() rb.addMD( strip_margin(""" | ## Decision Tree Classification Train Result | ### Decision Tree | {fig_tree} | | ### Feature Importance | {fig_feature_importances} | | ### Parameters | {list_parameters} | """.format(fig_tree=fig_tree, fig_feature_importances=fig_feature_importances, list_parameters=params))) model['report'] = rb.get() return {'model': model}
sns.barplot("Features", "Importance", data=importances_gb.sort_values(by='Importance', ascending=False), color="darkorange", alpha=0.6, ax=axs[1,1]) axs[1,1].set_xlabel("Features") axs[1,1].set_title("Gradient Boosting Importances") plt.tight_layout() plt.savefig('Importances.png') plt.show() #Graphviz for tree visualization dtree = DecisionTreeClassifier() #no parameters dtree = dtree.fit(features_train, target_train) export.export_graphviz(dtree, out_file = 'dtree_maxdepth.dot', feature_names = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Has_Cabin', 'FamilySize', 'Title'], class_names = True, filled = True, rounded = True, leaves_parallel=True) #Parameterized tree pdtree = DecisionTreeClassifier( random_state = 1, max_depth = 7, min_samples_split = 2, criterion='entropy') pdtree = pdtree.fit(features_train, target_train) export.export_graphviz(pdtree, out_file = 'dtree_7depth.dot', feature_names = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Has_Cabin', 'FamilySize', 'Title'],
sn.set(font_scale=1.4) # for label size sn.heatmap(df_cm, xticklabels=['Negativo', 'Neutro', 'Positivo'], yticklabels=['Negativo', 'Neutro', 'Positivo'], annot=True, annot_kws={"size": 14}, linewidths=.5, fmt="d") # font size plt.ylabel("Classificação real dos sentimentos") plt.xlabel("Classificação prevista dos sentimentos") plt.savefig('cmaddr.png') plt.show() #gerador de grafico para Árvore de Decisão export.export_graphviz( classificador, out_file='arvore_reviews3n.dot', feature_names=['Sentiment_Subjectivity', 'Sentiment_Polarity'], class_names=['Negative', 'Neutral', 'Positive'], filled=True, leaves_parallel=True) #foi feita uma primeira análise com o número de nodos livre e obteve-se precisao de 0.9986641731231632 #observou-se que houve um overfitting dos dados devido ao numero de nodos #decisao: diminuir o numero de nodos para k=3 #precisao = 0.998797755810847 #conclusao: #em uma arvore de decisao com 3 nós, houve um ganho de 0.001% de precisao, o que mostra que o atributo de polaridade #é um fator decisivo para se prever o sentimento
# In[12]: classificador.fit(previsores, classe) # In[15]: # Verificando a importância de cada atributo print(classificador.feature_importances_) # In[17]: # Criando arquivo para visualização gráfica da árvore de decisão criada export.export_graphviz( classificador, out_file='arvore.dot', feature_names=['historia', 'divida', 'garantias', 'renda'], class_names=['alto', 'moderado', 'baixo'], filled=True, leaves_parallel=True) # In[18]: resultados = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]]) # In[19]: resultados # In[22]: print(classificador.classes_)
predictors) #transforms and fit predictors returning updated df from sklearn.model_selection import train_test_split predictors_training, predictors_test, classLabel_training, classLabel_test = train_test_split( predictors, classLabel, test_size=0.25, random_state=0) ###################################### Algorithm ###################################### from sklearn.tree import DecisionTreeClassifier, export classificator = DecisionTreeClassifier(criterion='entropy') classificator.fit(predictors, classLabel) print(classificator.feature_importances_) export.export_graphviz( classificator, #generate a tree to visualize further out_file='creditdataTree.dot', feature_names=['income', 'age', 'loan'], class_names='classLabel', filled=True, leaves_parallel=True) # Income, Age, Loan pred1 = classificator.predict([[50000.00, 60.00, 10000.00]]) #returns class=0 #Now for whole predictors_test predictions = classificator.predict(predictors_test) from sklearn.metrics import confusion_matrix, accuracy_score precision = accuracy_score( classLabel_test, predictions) #will compare prredictions with the original classLabel matrix = confusion_matrix(