def predict(): """ 予測テスト """ result = None if request.method == "POST": article = request.form['article'] nlp = Nlp() classes = nlp.predict([article]) c = classes[0][0] result = {'article': article, 'class': c} return render_template("predict.html", result=result)
class Database: def __init__(self,data_dir,schema_dir): self.data_dir=data_dir self.schema_dir=schema_dir self.nlp=Nlp(data_dir,schema_dir) self.data_process=data_utils(data_dir, schema_dir) def query_db(self,question): engine = create_engine('sqlite://', echo=False) csv=self.nlp.csv_select(question) data_frame=self.data_process.get_dataframe(csv).astype(str) schema=self.data_process.get_schema_for_csv(csv) data_frame = data_frame.fillna(data_frame.mean()) sql_schema = {} for col in schema['columns']: colname = col['name'] coltype = col['type'] coltype = column_types.get(coltype).sql_type if '(' in coltype: coltype, arg = coltype.split('(') arg ='(' + arg[:-1] + ',)' coltype = getattr(types, coltype)(*(ast.literal_eval(arg))) else: coltype = getattr(types, coltype)() sql_schema[colname] = coltype data_frame.to_sql(schema['name'].lower(), con=engine, if_exists='replace', dtype=sql_schema) agent=Agent(self.data_dir,self.schema_dir) query=agent.get_query(question) return engine.execute(query).fetchall()
def get_query(self, question): try: data_process = data_utils(self.data_dir, self.schema_dir) data_process.create_vocab() nlp = Nlp(self.data_dir, self.schema_dir) csv = nlp.csv_select(question) if csv is None: print("Sorry,didn't catch that") question, valmap = nlp.get_sql_query(csv, question) sql_query = question for k, v in valmap.items(): sql_query = sql_query.replace(k, v) return sql_query except Exception as e: print(e)
def index(label=None): """ 記事一覧表示 """ db.build_db() limit = 0 if request.args.get('label'): label = int(request.args.get('label')) labels = (label,) else: label = 'all' labels = (0, 1, 2) articles = db.get_articles(labels, limit) # 予測 nlp = Nlp() predict_labels = [pl[0] for pl in nlp.predict([a[3] for a in articles])] return render_template("index.html", articles=articles, predict_labels=predict_labels, label=label)
def transform(self, X, y=None): nlp = Nlp() X = [nlp.remove_diacritics(doc) for doc in X] X = [nlp.remove_punctuations(doc) for doc in X] X = [nlp.normalize_arabic(doc) for doc in X] X = [nlp.num_to_text(doc) for doc in X] return X
def nlp_page(self): def destroy_win(): nlp_win.destroy() def launch(): text = search_entry.get() nlp_obj.other_launcher(text) nlp_obj.gui_launcher(text, self) nlp_obj.query(text) def switch_to_gui(): self.gui_page() nlp_win.destroy() def go_to_menu(): self.__init__() self.main_page() nlp_win.destroy() nlp_obj = Nlp() nlp_win = Tk() nlp_win.title("VPA Command Window") nlp_win.config(padx=30, pady=30) info_label = Label(nlp_win, text="Insert what you want in search box \n", fg='green', bg='white') info_label.grid(row=0, column=0, columnspan=2) search_entry = Entry(nlp_win, width=50) search_entry.grid(row=1, column=0, columnspan=2) search_btn = Button(nlp_win, text="GO", padx=22, bg='brown', fg='white', command=launch) search_btn.grid(row=2, column=0, columnspan=2)
def transform(self, X, y=None): nlp = Nlp() # Tokenization word_tokens = nlp.tokenize_doc(X) if (not self.lemmatization): # Stemming word_tokens = [nlp.stemmimg_text(w) for w in word_tokens] else: # Lemmatization word_tokens = [nlp.lemmatization_text(w) for w in word_tokens] # Stop words removal word_tokens = [nlp.remove_stop_words(w) for w in word_tokens] word_df = [ ' '.join([str(elem) for elem in doc]) for doc in word_tokens ] return word_df
def __init__(self): self.nlp = Nlp() self.data = Data() self.tmodel = Tmodel()
class Controller: def __init__(self): self.nlp = Nlp() self.data = Data() self.tmodel = Tmodel() def preprocessingData(self, data): label_id = [] label = [] pattern_words = [] all_word_list = [ ] # Liste mit allen in Pattern erhaltenen Wörtern / stemmed for row in range(data.shape[0]): for pattern in data["patterns"][row]: normalized_pattern = self.nlp.normalizeSentence(pattern) words = self.nlp.tokenizeSentence(normalized_pattern) words = self.nlp.stemWords(words) all_word_list.extend(words) label_id.append(data["id"][row]) label.append(data["label"][row]) # pattern_words.append(words) pattern_words.append(' '.join(words)) return { "df": self.data.createDataFrame(label_id, label, pattern_words), "unique_word_list": sorted(list(set(all_word_list))) } def create_x(self, processedObj: dict): x = [] for row in range(processedObj["df"].shape[0]): bow = [] for word in processedObj["unique_word_list"]: if word in processedObj["df"]["pattern_words"][row]: bow.append(1) else: bow.append(0) x.append(bow) return np.array(x) def create_y(self, data, processedObj: dict): labelDict = {} for row in range(data.shape[0]): if data["label"][row] not in labelDict: labelDict[data["label"][row]] = row labels = [] for row in range(processedObj["df"].shape[0]): labels.append(labelDict[processedObj["df"]["label"][row]]) unique_labels = list(set(labels)) return np.array(to_categorical(labels, num_classes=len(unique_labels))) def model(self, x, y): #======Training model: 3 Layer mit 256, 128, und Anzahl Intent-classes an Neuronen======# model = Sequential() model.add(Dense(128, input_shape=(x.shape[1], ), activation='relu')) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(y.shape[1], activation='softmax')) # SDG = Stochastic gradient descent sgd = SGD(lr=1e-2, decay=1e-2, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) hist = model.fit(x, y, epochs=150, batch_size=5, verbose=1) model.save('model.h5', hist) def run(self): data = self.data.provideData() print("Processing Data") processedObj = self.preprocessingData(data) print("Generating Trainingsset") x = self.create_x(processedObj) y = self.create_y(data, processedObj) print("Train Model") self.model(x, y) print("model created")
def fit(): """ 学習 """ nlp = Nlp() articles = db.get_articles() articles = [{'id': a[0], 'entry_id':a[1], 'link':a[2], 'summary':a[3], 'label':a[4]}for a in articles] nlp.build_dictionary(articles) dictionary = nlp.get_dictionary() nlp.build_articles(dictionary, articles) nlp.build_labels(articles) labels = nlp.get_labels() nlp.fit(dictionary, articles, labels) return redirect(url_for('index'))
def __init__(self, gui=None): self.prompt_limit = None self.recognizer = sr.Recognizer() self.microphone = sr.Microphone() self.nlp = Nlp() self.gui = gui
class VoiceRecognition: def __init__(self, gui=None): self.prompt_limit = None self.recognizer = sr.Recognizer() self.microphone = sr.Microphone() self.nlp = Nlp() self.gui = gui def run(self): self.wish_me() while True: response = self.recognize_speech_from_mic() print(response) if response['transcription'] == None: continue #self.nlp.gui_launcher(response['transcription'], self.gui) self.nlp.other_launcher(response['transcription']) self.nlp.query(response['transcription']) def wish_me(self): hour = int(datetime.datetime.now().hour) if hour>=0 and hour<12: text = "Good Morning! " elif hour>=21 and hour<18: text = "Good Afternoon! " else: text = "Good Evening! " text += "sir, how may I help you?" self.speak(text) def speak(self, text): Message = text speech = gTTS(text = Message) speech.save('./data/sound.mp3') playsound('./data/sound.mp3') def recognize_speech_from_mic(self): """Transcribe speech from recorded from `self.microphone`. Returns a dictionary with three keys: "success": a boolean indicating whether or not the API request was successful "error": `None` if no error occured, otherwise a string containing an error message if the API could not be reached or speech was unrecognizable "transcription": `None` if speech could not be transcribed, otherwise a string containing the transcribed text """ # check that self.recognizer and self.microphone arguments are appropriate type if not isinstance(self.recognizer, sr.Recognizer): raise TypeError("`self.recognizer` must be `Recognizer` instance") if not isinstance(self.microphone, sr.Microphone): raise TypeError("`self.microphone` must be `Microphone` instance") # adjust the self.recognizer sensitivity to ambient noise and record audio # from the self.microphone with self.microphone as source: self.recognizer.adjust_for_ambient_noise(source) audio = self.recognizer.listen(source) # set up the response object response = { "success": True, "error": None, "transcription": None } # try recognizing the speech in the recording # if a RequestError or UnknownValueError exception is caught, # update the response object accordingly try: response["transcription"] = self.recognizer.recognize_google(audio, language="en-in") except sr.RequestError: # API was unreachable or unresponsive response["success"] = False response["error"] = "API unavailable" except sr.UnknownValueError: # speech was unintelligible response["error"] = "Unable to recognize speech" return response
def __init__(self,data_dir,schema_dir): self.data_dir=data_dir self.schema_dir=schema_dir self.nlp=Nlp(data_dir,schema_dir) self.data_process=data_utils(data_dir, schema_dir)