def classify_sentence(clf, user_input): import features import pandas as pd keys = ["id", "wordCount", "stemmedCount", "stemmedEndNN", "CD", "NN", "NNP", "NNPS", "NNS", "PRP", "VBG", "VBZ", "startTuple0", "endTuple0", "endTuple1", "endTuple2", "verbBeforeNoun", "qMark", "qVerbCombo", "qTripleScore", "sTripleScore", "class"] myFeatures = features.features_dict('1', user_input, 'X') values = [] for key in keys: values.append(myFeatures[key]) s = pd.Series(values) width = len(s) myFeatures = s[1:width-1] # All but the last item (this is the class for supervised learning mode) # noqa: E501 predict = clf.predict([myFeatures]) return predict[0].strip()
def classify_sentence(clf,user_input): keys = ["id", "wordCount", "stemmedCount", "stemmedEndNN", "CD", "NN", "NNP", "NNPS", "NNS", "PRP", "VBG", "VBZ", "startTuple0", "endTuple0", "endTuple1", "endTuple2", "verbBeforeNoun", "qMark", "qVerbCombo", "qTripleScore", "sTripleScore", "class"] myFeatures = features.features_dict('1',user_input, 'X') values=[] for key in keys: values.append(myFeatures[key]) s = pd.Series(values) width = len(s) myFeatures = s[1:width-1] #print(myFeatures) #clf.fit(train[features], train['class']) predict = clf.predict([myFeatures]) predictions = clf.predict_proba([myFeatures]) #print("Predictions") #print(predictions) if (predict[0].strip()) =="C": val1 = predictions[0][0] if val1 >= 0.75: print("CHAT!") else: print("You tell me?") elif (predict[0].strip()) == "Q": val2 = predictions[0][2] if val2 >= 0.75: print("QUES!") else: print("You tell me?") else: val3 = predictions[0][3] if val3 >= 0.75: print("STAT!") else: print("You tell me?")
def sent_class(sentence): id = 1 # features needs an ID passing in at moment - maybe redundant? f = features.features_dict(str(id), sentence) fseries = features.features_series(f) width = len(fseries) fseries = fseries[ 1:width - 1] # All but the first and last item (strip ID and null class off) # Get a classification prediction from the Model, based on supplied features sentence_class = loaded_model.predict([fseries])[0].strip() return sentence_class
def sentenceForestClass(sentence): with open(MODEL_LOC, 'rb') as f: rf = pickle.load(f, encoding='latin1') id = hashtext( sentence ) #features needs an ID passing in at moment - maybe redundant? fseries = features.features_series(features.features_dict(id, sentence)) width = len(fseries) fseries = fseries[ 1:width - 1] #All but the first and last item (strip ID and null class off) #Get a classification prediction from the Model, based on supplied features sentenceClass = rf.predict([fseries])[0].strip() return sentenceClass
def sentence_rf_class(sentence): """ Pass in a sentence, with unique ID and pass back a classification code Use a pre-built Random Forest model to determine classification based on features extracted from the sentence. """ # Load a pre-built Random Forest Model with open(RF_MODEL_LOCATION, 'rb') as f: rf = pickle.load(f) id = hashtext(sentence) #features needs an ID passing in at moment - maybe redundant? fseries = features.features_series(features.features_dict(id,sentence)) width = len(fseries) fseries = fseries[1:width-1] #All but the first and last item (strip ID and null class off) #Get a classification prediction from the Model, based on supplied features sentence_class = rf.predict([fseries])[0].strip() return sentence_class
reader = csv.reader(fin) loopCount = 0 next(reader) #Assume we have a header for line in reader: sentence = line[0] c = line[1] #class-label id = hashlib.md5( str(sentence).encode('utf-8')).hexdigest()[:16] # generate a unique ID output = "" header = "" #get header and string output #output, header = features.get_string(id,sentence,c) f = features.features_dict(id, sentence, c) for key in keys: value = f[key] header = header + ", " + key output = output + ", " + str(value) if loopCount == 0: # only extract and print header for first dict item header = header[1:] #strip the first ","" off print(header) fout.writelines(header + '\n') output = output[1:] #strip the first ","" off loopCount = loopCount + 1 print(output)
#sentence = "Can a dog see in colour?" # #sentence = features.strip_sentence(sentence) #print(sentence) #pos = features.get_pos(sentence) #triples = features.get_triples(pos) #print(triples) sentences = [ "Can a dog see in colour?", "Hey, How's it going?", "Oracle 12.2 will be released for on-premises users on 15 March 2017", "When will Oracle 12 be released" ] id = 1 for s in sentences: features_dict = features.features_dict(str(id), s) features_string, header = features.get_string(str(id), s) # print(features_dict) # print(features_string) id += 1 from sklearn.ensemble import RandomForestClassifier FNAME = 'C://Users/Abhay/Downloads/NLPBot-master/NLPBot-master/analysis/featuresDump.csv' df = pd.read_csv(filepath_or_buffer=FNAME, ) #print(str(len(df)), "rows loaded") df.columns = df.columns[:].str.strip() df['class'] = df['class'].map(lambda x: x.strip())
"id", "wordCount", "stemmedCount", "stemmedEndNN", "CD", "NN", "NNP", "NNPS", "NNS", "PRP", "VBG", "VBZ", "startTuple0", "endTuple0", "endTuple1", "endTuple2", "verbBeforeNoun", "qMark", "qVerbCombo", "qTripleScore", "sTripleScore", "class" ] rows = [] next(reader) #Assume we have a header for line in reader: sentence = line[0] c = line[1] #class-label id = hashlib.md5( str(sentence).encode('utf-8')).hexdigest()[:16] # generate a unique ID f = features.features_dict(id, sentence, c) row = [] for key in keys: value = f[key] row.append(value) rows.append(row) faq = pd.DataFrame(rows, columns=keys) fin.close() featureNames = faq.columns[1:width - 1] #remove the first ID col and last col=classifier faqPreds = clf.predict(faq[featureNames]) predout = pd.DataFrame({ 'id': faq['id'],
import pandas as pd import sys import features CODE_LOC = 'C:\\Users\\Vishakha Lall\\Projects\\Python\\TestNLTK' DATA_LOC = 'C:\\Users\\Vishakha Lall\\Projects\\Python\\TestNLTK\\sentences.csv' sentences = pd.read_csv(filepath_or_buffer=DATA_LOC) print(sentences.head(10)) sentence = "Can a dog see in colour?" sentence = features.strip_sentence(sentence) print(sentence) pos = features.get_pos(sentence) triples = features.get_triples(pos) print(triples) sentences = [ "Can a dog see in colour?", "Hey, How's it going?", "Oracle 12.2 will be released for on-premises users on 15 March 2017", "When will Oracle 12 be released" ] id = 1 for s in sentences: features_dict = features.features_dict(str(id), s) features_string, header = features.get_string(str(id), s) print(features_dict) #print(features_string) id += 1
print(sentence) pos = features.get_pos(sentence) triples = features.get_triples(pos) print(triples) #Dictionary of features sentence = [ "Sorry, I don't know about the weather.", "That is a tricky question to answer.", "What does OCM stand for", "MAX is a Mobile Application Accelerator", "Can a dog see in colour?", "how are you" ] id = 1 for s in sentence: features_dict = features.features_dict(str(id), s) features_string, header = features.get_string(str(1), s) print(features_dict) id += 1 #Building a machine learning model import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier df = pd.read_csv("featuresDump.csv") print(str(len(df)), "rows loaded") #Strip any leading space from col names df.columns = df.columns[:].str.strip()