#def get_ from sklearn.tree import DecisionTreeClassifier train = corpus.load_corpus(all=True) statistic = analytics.load_analytics(train) heighest_probabilty = {} for i in statistic: heighest_probabilty[i] = max(statistic[i].items(),key=lambda x:x[1])[0] X_train_raw, Y_train_raw = extract_feature(data=train) #Global label_encoder to encode X values global_label_encoder,global_hot_encoder = set_encoder(Y_train_raw) print("Training Global Classifer ....") X_train,Y_train = encode_features(X_train_raw,Y_train_raw,global_label_encoder,global_hot_encoder) global_clf = DecisionTreeClassifier() global_clf.fit(X_train,Y_train) print("Completed") # print(train) # Identify the ambiguity classes amb_class = {} for i in train: for x,y in enumerate(i): #If the word only has one tagging, we don't need a classifier if len(statistic[y[0]]) == 1: pass #If there is an ambiguity, we need a decission tree classifier else:
from features import extract_feature, set_encoder,encode_features from corpus import load_corpus from sklearn.tree import DecisionTreeClassifier X_train_raw, Y_train_raw = extract_feature(data=load_corpus()) label_encoder,hot_encoder = set_encoder(Y_train_raw) X_train,Y_train = encode_features(X_train_raw,Y_train_raw,label_encoder,hot_encoder) clf = DecisionTreeClassifier() clf.fit(X_train,Y_train) X_test_raw,Y_test_raw = extract_feature(load_corpus(last=True)) X_test,Y_test = encode_features(X_test_raw,Y_test_raw,label_encoder,hot_encoder) print(clf.score(X_test,Y_test))