def generate(self, input_file, output_file): """ Generate a model. """ deploy_model = self.model.get_deploy_function() with open(output_file, 'w') as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines(): # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print(line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) res, score = beam_search(line, self.cr, deploy_model, beam_size=200, search_scope=200) print res res = [ ' '.join(self.cr.transform_input_text(s)) for s in res ] for r, s in zip(res, score): print('result: %s, score: %f.' % (r, s)) fw.writelines('result: %s, score: %f.\n' % (r, s))
def style_chaos(self, input_file, output_file): """ Compute the chaos of the questions. """ get_media_data_function = self.model.get_media_data_function() style_number = self.conf_dict['n_style'] with codecs.open(input_file, 'r', config.globalCharSet()) as fo: with open(output_file + 'chaos', 'w') as fw: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\t' % line.encode(config.globalCharSet())) (question, question_mask) = self.cr.transform_input_data(line) question = question[:-1] question_mask = question_mask[:-1] _, style_distribution = get_media_data_function(question, question_mask) st = style_distribution.tolist()[0] chaos = 0 for p in st: chaos += -p * math.log(p) print chaos, '\t', str(st) output = '%f\t%s\n' % (chaos, str(st)) fw.writelines(output.encode(config.globalCharSet()))
def generate(self, input_file, output_file): """ Generate a model. """ deploy_model = self.model.get_deploy_function() evaluate_model = self.reverse_model.get_evaluation_function() with open(output_file, 'w') as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) res, score = beam_search(line, self.cr, deploy_model, beam_size=10, search_scope=10, output_size=20) print res res = [' '.join(self.cr.transform_input_text(s[1:-1])) for s in res] cbres = list() for r, s in zip(res, score) : (question, question_mask) = self.cr.transform_input_data(r) (answer, answer_mask) = self.cr.transform_input_data(line) answer = np.concatenate([question[-1:], answer], axis=0) answer_mask = np.concatenate([question_mask[-1:], answer_mask], axis=0) question = question[:-1,:] question_mask = question_mask[:-1,:] sae, _, _= evaluate_model(question, question_mask, answer, answer_mask) cbres.append([r, s, sae]) for r, s, rs in sorted(cbres, key=lambda x: x[1]+x[2], reverse=True) : print ('result: %s, score: %f, %f' % (r.encode(config.globalCharSet()), s, rs)) # fw.writelines('result: %s, score: %f, %f\n' % (r.encode(config.globalCharSet()), s, rs)) fw.writelines('\n')
def generate(self, input_file, output_file): """ Generate a model. """ deploy_model = self.model.get_deploy_function() observe_model = self.model.get_observe_function() with open(output_file, 'w') as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) sentence, score = beam_search(line, self.cr, deploy_model, beam_size=50, search_scope=50) print sentence res = [' '.join(self.cr.transform_input_text(s)) for s in sentence] for r, st, s in zip(res, sentence, score)[0:5] : (question, question_mask) = self.cr.transform_input_data(line) (tanswer, tanswer_mask) = ([[i] for i in st], [[i] for i in [1]*len(st)]) [alpha] = observe_model(question[:-1,:], question_mask[:-1,:], tanswer, tanswer_mask) print ('result: %s, score: %f' % (r.encode(config.globalCharSet()), s)) for row in range(alpha.shape[0]) : for col in range(alpha.shape[1]) : print alpha[row, col, 0, 0], print fw.writelines('result: %s, score: %f\n' % (r.encode(config.globalCharSet()), s)) fw.writelines('\n')
def generate(self, input_file, output_file): """ Generate a model with style modeling. """ k = 10 topic_distribution_function = self.model.get_topic_distribution_function() style_distribution_function = self.model.get_style_distribution_function() deploy_model = self.model.get_deploy_function() style_number = self.conf_dict['n_style'] with codecs.open(input_file, 'r', config.globalCharSet()) as fo: with open(output_file, 'w') as fw: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) (question, question_mask) = self.cr.transform_input_data(line) question = question[:-1] question_mask = question_mask[:-1] media_data, topic_distribution = \ topic_distribution_function(question, question_mask) sorted_topics = \ sorted(enumerate(topic_distribution[0]), key=lambda x: x[1], reverse=True) all_prob = list() all_res = list() for topic, prob in sorted_topics[0:k] : for style in range(style_number): style_distribution =\ style_distribution_function(question, question_mask, numpy.array([topic], dtype='int64'))[0] all_prob.append([topic, style, prob, style_distribution[0][style]]) # print 'style number: %d, score: %f' % (style, style_distribution[0][style]) for topic, style, tp, sp in sorted(all_prob, key=lambda x: x[2]*x[3], reverse=True)[0:k] : def distribution_calculate(question, question_mask, answer, answer_mask): topic_vector = \ numpy.concatenate([numpy.array([topic], dtype='int64')]*question.shape[1], axis=0) return deploy_model(question, question_mask, answer, answer_mask, media_data, topic_vector, style) res, score = beam_search(line, self.cr, distribution_calculate, beam_size=5, search_scope=5) # print res for idx, r in enumerate(res) : all_res.append([res[idx], score[idx]-math.log(tp*sp)]) all_res = sorted(all_res, key=lambda x: x[1], reverse=False) print all_res res = [(' '.join(self.cr.transform_input_text(s[0])), s[1]) for s in all_res[0:5]] for r, s in res : print ('result: %s, score: %f' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f\n' % (r.encode(config.globalCharSet()), s)) fw.writelines('\n')
def generate_emb(self, input_file, output_file): #print output_file #print 'generate_b_v_t_g' get_cost= self.model.get_encoder_vector() with codecs.open(output_file, 'w', config.globalCharSet()) as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : line = line.strip() lines = line.strip().split('\t') (question, question_mask) = self.cr.transform_input_data(lines[0]) #(answer, answer_mask) = self.cr.transform_input_data(lines[1]) #(context,context_mask,context_mask2)=self.cr.transform_input_data_context(lines[3:]) #print question, question_mask #print answer, answer_mask #print lines[1] qa_cost=get_cost(question, question_mask) fw.write(line.strip()+'\t'+' '.join(str(i) for i in qa_cost[0])+'\n')
def generate(self, input_file, output_file): """ Generate a model. """ deploy_model = self.model.get_deploy_function() with open(output_file, 'w') as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) (question, question_mask) = self.cr.transform_input_data(line) hidden_states = deploy_model(question, question_mask)[0][0] print ('result: %s' % (' '.join([str(value) for value in hidden_states]))) fw.writelines('\n')
def generate_one_question(self, question, media_function, deploy_function, output_size=50, n_chosen_style=2): """ Generate a model with style modeling. """ style_number = self.conf_dict['n_style'] style_score = [0] * style_number style_candidate_list = [[]] * style_number # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") print (question.encode(config.globalCharSet())) (question0, question_mask0) = self.cr.transform_input_data(question) question0 = question0[:-1] question_mask0 = question_mask0[:-1] media_data, style_distribution = media_function(question0, question_mask0) print style_distribution style_score = style_distribution[0] style_sorted_index = sorted(range(style_number), key=lambda x:-math.log(style_score[x]))[:n_chosen_style] res_list = [] for style in range(style_number): def distribution_calculate(question, question_mask, answer, answer_mask): return deploy_function(question, question_mask, answer, answer_mask, media_data, style) res, score = beam_search(question, self.cr, distribution_calculate, beam_size=200, search_scope=200, output_size=5) print res res = [' '.join(self.cr.transform_input_text(s)) for s in res[0:5]] for r, s in zip(res, score) : style_candidate_list[style].append((r, s - math.log(style_score[style]))) for style_index in style_sorted_index: res_list += style_candidate_list[style_index] res_list = sorted(res_list, key=lambda x:x[1]) answer_list = [] answer_set = set() count = 0 for answer, score in res_list: if count >= output_size: break if not answer in answer_set: answer_list.append((answer, score)) count += 1 answer_set.add(answer) return style_candidate_list, answer_list
def generate(self, input_file, output_file): """ Generate a model. """ deploy_model = self.model.get_deploy_function() with open(output_file, 'w') as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) res, score = self.generate_one_question(line, deploy_model, output_size=5) for r, s in zip(res, score) : print ('result: %s, score: %f' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f\n' % (r.encode(config.globalCharSet()), s)) fw.writelines('\n')
def generate_b_v(self, input_file, output_file): """ Generate a model with special optimizers. """ deploy_model = self.model.get_deploy_function() with codecs.open(output_file, 'w', config.globalCharSet()) as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_word.decode("gb18030") # line = line_word line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line) toReturn = self.generate_one_question_b_v(line, deploy_model) for r in toReturn : print ('result: %s' % (r)) fw.writelines('result: %s\n' % (r))
def generate(self, input_file, output_file): """ Generate a model. """ deploy_model = self.model.get_deploy_function() with open(output_file, 'w') as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) res, score = self.generate_one_question(line, deploy_model, output_size=5, beam_size=100, search_scope=100) for r, s in zip(res, score) : print ('result: %s, score: %f' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f' % (r.encode(config.globalCharSet()), s)) fw.writelines('\n')
def generate(self, input_file, output_file): """ Generate a model with style modeling. """ media_function = self.model.get_media_data_function() deploy_function = self.model.get_deploy_function() style_number = self.conf_dict['n_style'] for style in range(style_number): with codecs.open(input_file, 'r', config.globalCharSet()) as fo: with open(output_file + str(style), 'w') as fw: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_zi.decode("gb18030") line = line.strip() print (line.encode(config.globalCharSet())) fw.writelines('%s\n' % line.encode(config.globalCharSet())) (question, question_mask) = self.cr.transform_input_data(line) question = question[:-1] question_mask = question_mask[:-1] media_data, style_distribution = media_function(question, question_mask) print style_distribution print 'style number : %d, score: %f' % (style, style_distribution[0][style]) fw.writelines('style number : %d, score: %f\n' % (style, style_distribution[0][style])) def distribution_calculate(question, question_mask, answer, answer_mask): return deploy_function(question, question_mask, answer, answer_mask, media_data, style) res, score = beam_search(line, self.cr, distribution_calculate, beam_size=200, search_scope=200) print res res = [' '.join(self.cr.transform_input_text(s)) for s in res[0:5]] for r, s in zip(res, score) : print ('result: %s, score: %f' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f\n' % (r.encode(config.globalCharSet()), s)) fw.writelines('\n')
def generate_b_v_t_c(self, input_file, output_file): #print output_file #print 'generate_b_v_t_g' get_cost = self.model.classification_deploy() total_num = 0 true_num = 0 with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines(): line = line.strip() lines = line.strip().split('\t') (question, question_mask) = self.cr.transform_input_data(lines[0]) qa_cost = get_cost(question, question_mask) total_num += 1 if (string.atof(lines[2]) == qa_cost[0][0]): true_num += 1.0 print output_file print true_num print total_num print true_num / total_num
def generate_one_question_b_v(self, question, deploy_model): question_make_sense = isMakeSense(question) res, score = beam_search(question, self.cr, deploy_model, beam_size=100, search_scope=100, output_size=50) print res res = [' '.join(self.cr.transform_input_text(s)) for s in res] resorted_list = list() for r, s in zip(res, score): idf = 0.0 tokens = r.split(u' ') for token in tokens[1:-1]: idf += get_idf(token) # idf /= len(tokens) # idf_revise = 1 / (1 + np.exp(-2 / idf)) idf_revise = 4 * np.tanh(4 * idf) resorted_list.append((r, s, s)) if len(question) > 3: resorted_list = sorted(resorted_list, key=lambda x:x[2] / len(question) ** 1) else: resorted_list = sorted(resorted_list, key=lambda x:x[2]) candidates = list() if question_make_sense == 1: f = 0 for r, _, _ in resorted_list[:5]: ori_sentence = r.replace(u'<END>', u'').replace(u' ', u'') if isMakeSense(ori_sentence) == 1: f += 1 if f <= 1: question_make_sense = 0 for r, s1, s2 in resorted_list: ori_sentence = r.strip().replace(u'<END>', u'') ori_sentence = ori_sentence.replace(u' ', u'') answer_make_sense = isMakeSense(ori_sentence) r0 = r if isinstance(r, unicode) : r0 = r.encode(config.globalCharSet()) print r0, s1, s2, answer_make_sense, if len(ori_sentence) <= 3 \ and len(ori_sentence) < len(question) and ori_sentence in question: print 'continue1' continue if answer_make_sense == -1 or u'ϵͳ' in ori_sentence or u'NUM' in ori_sentence: print 'continue2' continue if question_make_sense == 1 and answer_make_sense <= 0: print 'continue3' continue # r_token_count = len(ori_sentence.strip().split(u' ')) # if question_word_count > 1 and r_token_count == 1: # print 'continue4' # continue candidates.append((r, s2)) print 'variousen' variousen_scope = 15 output_size = 5 high_fruq_left = 4 if len(candidates) == 0: return candidates, _ = zip(*candidates) # v_index = variousen_strings(candidates[:variousen_scope], output_size) # v_index = range(min(len(candidates), high_fruq_left)) + v_index # # print v_index # func = lambda x, y:x if y in x else x + [y] # v_index = reduce(func, [[], ] + v_index) # toReturn = [candidates[i] for i in v_index[:output_size]] toReturn = candidates[:output_size] return toReturn
def generate_b_v_t_g(self, input_file, output_file): """ Generate a model with special optimizers. """ deploy_model = self.model.get_deploy_function() #get_cost= self.model.get_cost() print output_file print 'generate_b_v_t_g' with codecs.open(output_file, 'w', config.globalCharSet()) as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_word.decode("gb18030") # line = line_word line = line.strip() lines = line.strip().split('\t') #(question, question_mask) = self.cr.transform_input_data(lines[0]) #(answer, answer_mask) = self.cr.transform_input_data(lines[1]) #qa_cost=get_cost(question, question_mask,answer,answer_mask,[[string.atoi(lines[2])]]) #fw.write(line+'\t'+str(qa_cost)+'\n') question_make_sense = 1#isMakeSense(line) print (line.encode(config.globalCharSet())) #fw.writelines('%s\n' % line) if(len(lines)==3): line='' line+=lines[0] line+='\t' line+=lines[2] fw.writelines('%s\n' % line) res, score = beam_search_t(line, self.cr, deploy_model, beam_size=200, search_scope=100) print res res1= [s[:-1] for s in res] res2= [s[-1] for s in res] res = [' '.join(self.cr.transform_input_text(s)) for s in res1] for res_len in range(len(res)): res[res_len]+='\t' res[res_len]+=str(res2[res_len]) resorted_list = list() for r, s in zip(res, score): idf = 0.0 tokens = r.split(u' ') for token in tokens[1:-1]: idf += get_idf(token) # idf /= len(tokens) # idf_revise = 1 / (1 + np.exp(-2 / idf)) idf_revise = 4 * np.tanh(4 * idf) resorted_list.append((r, s, s)) if len(line) > 3: resorted_list = sorted(resorted_list, key=lambda x:x[2] / len(line) ** 1) else: resorted_list = sorted(resorted_list, key=lambda x:x[2]) candidates = list() if question_make_sense == 1: f = 0 for r, _, _ in resorted_list[:5]: ori_sentence = r.replace(u'<END>', u'').replace(u' ', u'') #if isMakeSense(ori_sentence) == 1: if 1: f += 1 if f <= 1: question_make_sense = 0 for r, s1, s2 in resorted_list: ori_sentence = r.strip().replace(u'<END>', u'') ori_sentence = ori_sentence.replace(u' ', u'') answer_make_sense = 1#isMakeSense(ori_sentence) r0 = r if isinstance(r, unicode) : r0 = r.encode(config.globalCharSet()) print r0, s1, s2, answer_make_sense if len(ori_sentence) <= 3 \ and len(ori_sentence) < len(line) and ori_sentence in line: print 'continue1' continue if answer_make_sense == -1: print 'continue2' continue if question_make_sense == 1 and answer_make_sense <= 0: print 'continue3' continue # r_token_count = len(ori_sentence.strip().split(u' ')) # if question_word_count > 1 and r_token_count == 1: # print 'continue4' # continue candidates.append((r, s2)) print 'variousen' variousen_scope = 15 output_size = 5 high_fruq_left = 5 v_index = variousen_strings(candidates[:variousen_scope], output_size) v_index = range(min(len(candidates), high_fruq_left)) + v_index # print v_index func = lambda x, y:x if y in x else x + [y] v_index = reduce(func, [[], ] + v_index) toReturn = [candidates[i] for i in v_index[:output_size]] for r, s in toReturn : print ('result: %s, score: %f.' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f.\n' % (r, s)) '''
def generate_b_v_t(self, input_file, output_file): """ Generate a model with special optimizers. """ answer_set=[] answer_dict={} for answer_smaple in answer_set: tmp=[] for i in range(len(answer_smaple)): tmp.append(answer_smaple[i]) #print map(str,tmp) answer_dict[str(tmp)]=1 deploy_model = self.model.get_deploy_function() print output_file start = time.clock() with codecs.open(output_file, 'w', config.globalCharSet()) as fw: with codecs.open(input_file, 'r', config.globalCharSet()) as fo: for line in fo.readlines() : # line_word, line_zi = SegProcess(line.strip()) # line = line_word.decode("gb18030") # line = line_word line = line.strip() lines=line.split('\t') line1=lines[0]+'\t'+lines[2]+'\t'+lines[3]+'\n' question_make_sense = 1#isMakeSense(line) #print (line.encode(config.globalCharSet())) #fw.writelines('%s\n' % line) #line=lines[0]+'\t1\n' res, score = beam_search_t(line1, self.cr, deploy_model,answer_dict, beam_size=10, search_scope=10) #print score if(len(res)<=0): print 'not find' continue for i in range(1): res1=[res[i][1:-1]] #print res,score res1 = [' '.join(self.cr.transform_input_text(s)) for s in res1] try: fw.write(line+'\t'+res1[0]+'\t'+str(score[i])+'\n') #fw.write(line+'\n') except: print res ''' resorted_list = list() for r, s in zip(res, score): idf = 0.0 tokens = r.split(u' ') for token in tokens[1:-1]: idf += get_idf(token) # idf /= len(tokens) # idf_revise = 1 / (1 + np.exp(-2 / idf)) idf_revise = 4 * np.tanh(4 * idf) resorted_list.append((r, s, s)) if len(line) > 3: resorted_list = sorted(resorted_list, key=lambda x:x[2] / len(line) ** 1) else: resorted_list = sorted(resorted_list, key=lambda x:x[2]) candidates = list() if question_make_sense == 1: f = 0 for r, _, _ in resorted_list[:5]: ori_sentence = r.replace(u'<END>', u'').replace(u' ', u'') #if isMakeSense(ori_sentence) == 1: if 1: f += 1 if f <= 1: question_make_sense = 0 for r, s1, s2 in resorted_list: ori_sentence = r.strip().replace(u'<END>', u'') ori_sentence = ori_sentence.replace(u' ', u'') answer_make_sense = 1#isMakeSense(ori_sentence) r0 = r if isinstance(r, unicode) : r0 = r.encode(config.globalCharSet()) print r0, s1, s2, answer_make_sense, if len(ori_sentence) <= 3 \ and len(ori_sentence) < len(line) and ori_sentence in line: print 'continue1' continue if answer_make_sense == -1: print 'continue2' continue if question_make_sense == 1 and answer_make_sense <= 0: print 'continue3' continue # r_token_count = len(ori_sentence.strip().split(u' ')) # if question_word_count > 1 and r_token_count == 1: # print 'continue4' # continue candidates.append((r, s2)) print 'variousen' variousen_scope = 15 output_size = 5 high_fruq_left = 4 v_index = variousen_strings(candidates[:variousen_scope], output_size) v_index = range(min(len(candidates), high_fruq_left)) + v_index # print v_index func = lambda x, y:x if y in x else x + [y] v_index = reduce(func, [[], ] + v_index) toReturn = [candidates[i] for i in v_index[:output_size]] for r, s in toReturn : print ('result: %s, score: %f.' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f.\n' % (r, s)) for r in res[0:5] : #fw.writelines('result: %s, score: %f\n' % (r.encode(config.globalCharSet()), s)) fw.writelines('result: %s, score: %f\n' % (r, s)) fw.writelines('\n') ''' end = time.clock() print "read: %f s" % (end - start)
if __name__ == '__main__': base_path = os.path.join(os.getcwd(), 'data') dataset_folder = os.path.join(base_path, sys.argv[1]) dataset_file = os.path.join(base_path, sys.argv[2]) dict_file = os.path.join(base_path, sys.argv[3]) stopwords_file = os.path.join(base_path, sys.argv[4]) word_embedding_file = os.path.join(base_path, sys.argv[5]) train_rate = string.atof(sys.argv[6]) valid_rate = string.atof(sys.argv[7]) test_rate = string.atof(sys.argv[8]) algo_name = sys.argv[9] mode = sys.argv[10] charset = config.globalCharSet() print ('dataset file: %s.' % (dataset_file)) print ('dict file: %s.' % (dict_file)) print ('stopwords file: %s.' % (stopwords_file)) print ('word embedding file: %s.' % (word_embedding_file)) print ('algorithms name: %s.' % (algo_name)) print ('mode: %s.' % (mode)) print ('charset: %s.' % (charset)) if algo_name == 'SeqToSeq' : from deep.manage.model.seq_to_seq import RnnEncoderDecoder manager = RnnEncoderDecoder(dataset_folder, dataset_file, dict_file, stopwords_file, word_embedding_file, train_rate, valid_rate, test_rate, algo_name, charset, mode) elif algo_name == 'ChoEncoderDecoder' : from deep.manage.model.cho_encoder_decoder import RnnEncoderDecoder manager = RnnEncoderDecoder(dataset_folder, dataset_file, dict_file, stopwords_file, word_embedding_file,
dict_file = sys.argv[3] stopwords_file = sys.argv[4] word_embedding_file = sys.argv[5] ''' dataset_folder = os.path.join(base_path, sys.argv[1]) dataset_file = os.path.join(base_path, sys.argv[2]) dict_file = os.path.join(base_path, sys.argv[3]) stopwords_file = os.path.join(base_path, sys.argv[4]) word_embedding_file = os.path.join(base_path, sys.argv[5]) ''' train_rate = string.atof(sys.argv[6]) valid_rate = string.atof(sys.argv[7]) test_rate = string.atof(sys.argv[8]) algo_name = sys.argv[9] mode = sys.argv[10] charset = config.globalCharSet() print('dataset file: %s.' % (dataset_file)) print('dict file: %s.' % (dict_file)) print('stopwords file: %s.' % (stopwords_file)) print('word embedding file: %s.' % (word_embedding_file)) print('algorithms name: %s.' % (algo_name)) print('mode: %s.' % (mode)) print('charset: %s.' % (charset)) if algo_name == 'SeqToSeq': from deep.manage.model.seq_to_seq import RnnEncoderDecoder manager = RnnEncoderDecoder(dataset_folder, dataset_file, dict_file, stopwords_file, word_embedding_file, train_rate, valid_rate, test_rate, algo_name, charset, mode) elif algo_name == 'ChoEncoderDecoder':