def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False): env = gym_super_mario_bros.make('SuperMarioBros-v2') self.env = JoypadSpace(env, SIMPLE_MOVEMENT) self.preprocess = Preprocess(img_size, stacks, return_seq) self.skips = skips self.action_space = self.env.action_space self.observation_space = (img_size, img_size, stacks)
class Mario(): def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False): env = gym_super_mario_bros.make('SuperMarioBros-v2') self.env = JoypadSpace(env, SIMPLE_MOVEMENT) self.preprocess = Preprocess(img_size, stacks, return_seq) self.skips = skips self.action_space = self.env.action_space self.observation_space = (img_size, img_size, stacks) def reset(self): self.preprocess.reset() s = self.env.reset() s = self.preprocess(s) return s def step(self, a): total_r = 0 for i in range(self.skips): self.env.render() n_s, r, done, info = self.env.step(a) n_s = self.preprocess(n_s) total_r += r if done: break return n_s, total_r, done, info
class Pong(): def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False): self.env = gym.make('Pong-v0') self.preprocess = Preprocess(img_size, stacks, return_seq) self.skips = skips self.pong_action = {0: 0, 1: 2, 2: 3} self.action_space = self.env.action_space self.action_space.n = 3 self.observation_space = (img_size, img_size, stacks) def reset(self): self.preprocess.reset() s = self.env.reset() s = self.preprocess(s) return s def step(self, a): total_r = 0 for i in range(self.skips): self.env.render() n_s, r, done, info = self.env.step(self.pong_action[a]) n_s = self.preprocess(n_s) total_r += r if done: break return n_s, total_r, done, info
def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False): self.env = gym.make('Pong-v0') self.preprocess = Preprocess(img_size, stacks, return_seq) self.skips = skips self.pong_action = {0: 0, 1: 2, 2: 3} self.action_space = self.env.action_space self.action_space.n = 3 self.observation_space = (img_size, img_size, stacks)
import os import sys currentdir = os.path.dirname(os.path.realpath(__file__)) parentdir = os.path.dirname(currentdir) #grandparentdir = os.path.dirname(parentdir) sys.path.append(parentdir) print(parentdir) from utils.Preprocess import Preprocess sent = "i love potato, i have a party. so i need your help. i'm happy." p = Preprocess() pos = p.pos(sent) ret = p.get_keywords(pos, without_tag=False) print(ret) ret = p.get_keywords(pos, without_tag=True) print(ret)
sys.path.append(grandparentdir) from utils.Preprocess import Preprocess from tensorflow.keras import preprocessing import pickle def read_corpus_data(filename): with open(filename, 'r') as f: data = [line.split('\t') for line in f.read().splitlines()] data = data[1:] return data corpus_data = read_corpus_data('./corpus.txt') p = Preprocess() dict = [] for c in corpus_data: pos = p.pos(c[1]) for k in pos: dict.append(k[0]) #사전에 사용될 word2index 생성 #사전의 첫 번째 인덱스에는 OOV 사용 tokenizer = preprocessing.text.Tokenizer(oov_token='OOV') tokenizer.fit_on_texts(dict) word_index = tokenizer.word_index #사전 파일 생성 f = open("chatbot_dict.bin", "wb") try:
import pandas as pd import tensorflow as tf from tensorflow.keras import preprocessing from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Embedding, Dense, Dropout, Conv1D, GlobalMaxPool1D, concatenate from tensorflow.keras.utils import plot_model # 데이터 읽어오기 train_file = "drive/MyDrive/deep-chatbot/models/intent/total_train_data.csv" data = pd.read_csv(train_file, delimiter=',') queries = data['query'].tolist() intents = data['intent'].tolist() from utils.Preprocess import Preprocess p = Preprocess(word2index_dic= 'drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin') # 단어 시퀀스 생성 sequences = [] for sentence in queries: pos = p.pos(str(sentence)) keywords = p.get_keywords(pos, without_tag=True) seq = p.get_wordidx_sequence(keywords) sequences.append(seq) # 단어 인덱스 시퀀스 벡터 ○2 # 단어 시퀀스 벡터 크기 from config.GlobalParams import MAX_SEQ_LEN padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post')
from utils.Preprocess import Preprocess sent = input() # 전처리 객체 생성 p = Preprocess(userdic='../utils/user_dic.tsv') # 형태소 분석기 실행 pos = p.pos(sent) # 품사 태그와 같이 키워드 출력 ret = p.get_keywords(pos, without_tag=False) print(ret) # 품사 태그 없이 키워드 출력 ret = p.get_keywords(pos, without_tag=True) print(ret)
# 필요한 모듈 임포트 import pandas as pd import tensorflow as tf from tensorflow.keras import preprocessing from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Embedding, Dense, Dropout, Conv1D, GlobalMaxPool1D, concatenate # 데이터 읽어오기 train_file = "total_train_data.csv" data = pd.read_csv(train_file, delimiter=',', encoding='cp949') queries = data['query'].tolist() intents = data['intent'].tolist() from utils.Preprocess import Preprocess p = Preprocess(word2index_dic='../../train_tools/dict/chatbot_dict.bin', userdic='../../utils/user_dic.tsv') # 단어 시퀀스 생성 sequences = [] for sentence in queries: pos = p.pos(sentence) keywords = p.get_keywords(pos, without_tag=True) seq = p.get_wordidx_sequence(keywords) sequences.append(seq) # 단어 인덱스 시퀀스 벡터 ○2 # 단어 시퀀스 벡터 크기 padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=15, padding='post')
from tensorflow.keras import preprocessing import pickle # 말뭉치 데이터 읽어오기 def read_corpus_data(filename): with open(filename, 'r') as f: data = [line.split('\t') for line in f.read().splitlines()] return data # 말뭉치 데이터 가져오기 corpus_data = read_corpus_data('./corpus.txt') # 망뭉치 데이터에서 키워드만 추출해서 사전 리스트 생성 p = Preprocess(word2index_dic='chatbot_dict.bin', userdic = '../../utils/user_dic.tsv') dict = [] for c in corpus_data: pos = p.pos(c[1]) for k in pos: dict.append(k[0]) # keywords = p.get_keywords(pos, without_tag=True) # for k in keywords: # dict.append(k) # 사전에 사용될 word2index 생성 # 사전의 첫번 째 인덱스에는 OOV 사용 tokenizer = preprocessing.text.Tokenizer(oov_token='OOV') tokenizer.fit_on_texts(dict) word_index = tokenizer.word_index
def read_file(file_name): sents = [] with open(file_name, 'r', encoding='utf-8') as f: lines = f.readlines() for idx, l in enumerate(lines): if l[0] == ';' and lines[idx + 1][0] == '$': this_sent = [] elif l[0] == '$' and lines[idx - 1][0] == ';': continue elif l[0] == '\n': sents.append(this_sent) else: this_sent.append(tuple(l.split())) return sents p = Preprocess(word2index_dic='drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin') # 학습용 말뭉치 데이터를 불러옴 corpus = read_file('drive/MyDrive/deep-chatbot/models/ner/ner_train.txt') # 말뭉치 데이터에서 단어와 BIO 태그만 불러와 학습용 데이터셋 생성 sentences, tags = [], [] for t in corpus: tagged_sentence = [] sentence, bio_tag = [], [] for w in t: tagged_sentence.append((w[1], w[3])) sentence.append(w[1]) bio_tag.append(w[3]) sentences.append(sentence)
import sys sys.path.append('/content/drive/MyDrive/deep-chatbot/') import tensorflow as tf from tensorflow.keras.models import Model, load_model from tensorflow.keras import preprocessing intent_labels = {0: "인사", 1: "질문"} # 의도 분류 모델 불러오기 model = load_model('drive/MyDrive/deep-chatbot/models/intent/intent_model.h5') query = "데이터베이스" from utils.Preprocess import Preprocess p = Preprocess(word2index_dic='drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin', userdic='drive/MyDrive/deep-chatbot/utils/user_dic.tsv') pos = p.pos(query) keywords = p.get_keywords(pos, without_tag=True) seq = p.get_wordidx_sequence(keywords) sequences = [seq] # 단어 시퀀스 벡터 크기 from config.GlobalParams import MAX_SEQ_LEN padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post') predict = model.predict(padded_seqs) predict_class = tf.math.argmax(predict, axis=1) print(query) print("의도 예측 점수 : ", predict) print("의도 예측 클래스 : ", predict_class.numpy()) print("의도 : ", intent_labels[predict_class.numpy()[0]])
from utils.Preprocess import Preprocess sent = "김포시에 있는 고려병원 위치 알려줘" p = Preprocess(userdic='../utils/user_dic.txt') # 사용자 사전 pos = p.pos(sent) # 제외시킬 품사 // 형태소단위로 끊은 형태 (품사가 아직 제외가 안됨)! # print(pos[0]) # print(pos[1]) # print(pos[2]) # print(pos[3]) # print(pos[4]) # print(pos[5]) # print(pos[6]) # print(pos[7]) # print(pos[8]) # print(pos[9]) # print(pos) # ret = p.get_keywords(pos, without_tag= False) # print(pos) # print(ret) # # # 태그 없이 단어만 추출 ret = p.get_keywords(pos, without_tag=True) print(ret)
from models.intent.IntentModel import IntentModel from utils.Preprocess import Preprocess p = Preprocess(word2index_dic='train_tools/dict/chatbot_dict.bin', userdic='../Tokenizing/user_dic.txt') intent = IntentModel(model_name='models/intent/intent_model.h5', proprocess=p) query = "안녕하세요." intent_predict = intent.predict_class(query) print(intent_predict)
import sys sys.path.append('/content/drive/MyDrive/deep-chatbot/') import pickle from utils.Preprocess import Preprocess # 단어 사전 불러오기 f = open("drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin", "rb") word_index = pickle.load(f) f.close() sent = "데이터베이스가 뭔지 알려줘" # 전처리 객체 생성 p = Preprocess(userdic='drive/MyDrive/deep-chatbot/utils/user_dic.tsv') # 형태소분석기 실행 pos = p.pos(sent) # 품사 태그 없이 키워드 출력 keywords = p.get_keywords(pos, without_tag=True) for word in keywords: try: print(word, word_index[word]) except KeyError: # 해당 단어가 사전에 없는 경우, OOV 처리 print(word, word_index['OOV'])
from utils.Preprocess import Preprocess from tensorflow.keras import preprocessing #sent = "내일 오전 10시에 짬뽕 주문하고 싶어ㅋㅋ" sent = "내일 오전 10시에 탕수육 주문하고 싶어" p = Preprocess(word2index_dic='../train_tools/dict/chatbot_dict.bin', userdic='../utils/user_dic.tsv') pos = p.pos(sent) keywords = p.get_keywords(pos, without_tag=False) print(keywords) # w2i = p.get_wordidx_sequence(keywords) # sequences = [w2i] # # MAX_SEQ_LEN = 15 # 임베딩 벡터 크기 # padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post') # # print(keywords) # print(sequences) # print(padded_seqs)
from utils.Preprocess import Preprocess from models.intent.IntentModel import IntentModel p = Preprocess(word2index_dic='../train_tools/dict/chatbot_dict.bin', userdic='../utils/user_dic.tsv') intent = IntentModel(model_name='../models/intent/intent_model.h5', proprocess=p) query = "오늘 탕수육 주문 가능한가요?" predict = intent.predict_class(query) predict_label = intent.labels[predict] print(query) print("의도 예측 클래스 : ", predict) print("의도 예측 레이블 : ", predict_label)
with open(file_name, 'r', encoding='utf-8') as f: lines = f.readlines() for idx, l in enumerate(lines): if l[0] == ';' and lines[idx + 1][0] == '$': this_sent = [] elif l[0] == '$' and lines[idx - 1][0] == ';': continue elif l[0] == '\n': sents.append(this_sent) else: this_sent.append(tuple(l.split())) return sents # 전처리 객체 생성 p = Preprocess(word2index_dic='../../train_tools/dict/chatbot_dict.bin', userdic='../../utils/user_dic.tsv') # 학습용 말뭉치 데이터를 불러옴 corpus = read_file('ner_train.txt') # 말뭉치 데이터에서 단어와 BIO 태그만 불러와 학습용 데이터셋 생성 sentences, tags = [], [] for t in corpus: tagged_sentence = [] sentence, bio_tag = [], [] for w in t: tagged_sentence.append((w[1], w[3])) sentence.append(w[1]) bio_tag.append(w[3]) sentences.append(sentence)