def main(): dic = pickle.load(open('dictionary_value.pkl')) all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1') all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2') all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3') all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items()) sent,length = extract_word2vec_length(all_logs,dic) #print sent with open('sent_100.pkl','w') as f: pickle.dump(sent,f) with open('length.pkl','w') as f: pickle.dump(length,f)
def main(): dic = pickle.load(open('dictionary_value.pkl')) all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1') all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2') all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3') all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items()) sent, length = extract_word2vec_length(all_logs, dic) #print sent with open('sent_100.pkl', 'w') as f: pickle.dump(sent, f) with open('length.pkl', 'w') as f: pickle.dump(length, f)
import readall rating_logs = readall.readall("/home/ubuntu/zhou/Backend/rating_log/v4") writelist = readall.get_log(rating_logs) strategy_scan = ['init','end','more','switch','joke'] strategy_pre = ['continue','oov','short_answer','name_entity','no_repeat'] table_strategy_app = [0,0,0] table = {'init':[0,0,0],'end':[0,0,0],'more':[0,0,0],'switch':[0,0,0],'joke':[0,0,0]} for rate in rating_logs: for tmpdict in writelist: #print tmpdict strategy = tmpdict["strategy"] for stra in strategy: if stra in strategy_pre: break print tmpdict["strategy"] index = int(tmpdict["app_value"])-1 table_strategy_app[index] = table_strategy_app[index] +1 #table[strategy_real][index] = table[strategy_real][index] +1 print table_strategy_app print table
import gensim, logging import pickle import readall import nltk logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',level=logging.INFO) all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1') all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2') all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3') all_v5 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v5') all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items() +all_v5.items()) sentences =[] user_input = [] dictionary = [] for item in all_logs: #print item conv = all_logs[item]["Turns"] for turn in conv: sentences.append(nltk.word_tokenize(conv[turn]["You"].lower())) sentences.append(nltk.word_tokenize(conv[turn]["TickTock"].lower())) user_input.append(conv[turn]["You"]) #print len(sentences) #print sentences model = gensim.models.Word2Vec(sentences,size =100, min_count=1) dictionary = list(set([item for sublist in sentences for item in sublist])) #print dictionary model.save('/tmp/word2vec_100_break') pickle.dump(dictionary, open('dictionary_conv.pkl','w')) with open('user_input_all.pkl','w') as f: pickle.dump(user_input,f)
import readall rating_logs = readall.readall("/home/ubuntu/zhou/Backend/rating_log/v4") writelist = readall.get_log(rating_logs) strategy_scan = ['init', 'end', 'more', 'switch', 'joke'] strategy_pre = ['continue', 'oov', 'short_answer', 'name_entity', 'no_repeat'] table_strategy_app = [0, 0, 0] table = { 'init': [0, 0, 0], 'end': [0, 0, 0], 'more': [0, 0, 0], 'switch': [0, 0, 0], 'joke': [0, 0, 0] } for rate in rating_logs: for tmpdict in writelist: #print tmpdict strategy = tmpdict["strategy"] for stra in strategy: if stra in strategy_pre: break print tmpdict["strategy"] index = int(tmpdict["app_value"]) - 1 table_strategy_app[index] = table_strategy_app[index] + 1 #table[strategy_real][index] = table[strategy_real][index] +1 print table_strategy_app print table
import readall import gensim import nltk import numpy as np import pickle # we need to extract some features, now we make it easy now to just use the word2vec, one turn previous turn. # model = gensim.models.Word2Vec.load('/tmp/word2vec_50_break') all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1') all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2') all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3') all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items()) sent_vec = None for item in all_logs: print item conv = all_logs[item]["Turns"] sent_pre = None for turn in conv: turn_vec_1 = sum(model[nltk.word_tokenize(conv[turn]["You"])]) if len(nltk.word_tokenize(conv[turn]["TickTock"])) == 0: continue #print 'TickTock' #print conv[turn]["TickTock"] turn_vec_2 = sum(model[nltk.word_tokenize(conv[turn]["TickTock"])]) #print turn_vec_1 #print turn_vec_2 if sent_vec is None: sent_vec = turn_vec_1 + turn_vec_2 + turn_vec_1 + turn_vec_2 target = np.array(int(conv[turn]["Appropriateness"])) else: