def main(argv): param_fpath = sys.argv[1] param_dic = dict() with open(param_fpath) as f: line = f.readline().strip() while line != '': param_dic[line.split('=')[0].strip()] = line.split('=')[1].strip() line = f.readline().strip() gen_method = param_dic['gen_method'] corpus_word_path = param_dic['corpus_word_path'] corpus_word_vector_path = param_dic['corpus_word_vector_path'] utt_path_list = param_dic['utt_path_list'].split() output_fpath_list = param_dic['output_fpath_list'].split() task_cat_path = param_dic['task_cat_path'] nontask_cat_path = param_dic['nontask_cat_path'] # # ----- / STEP1: generate sentence vector & save to file / ----- # # # PARAM: sentence generation method: [mean | inner_prd] # gen_method = 'mean' # # gen_method = 'inner_prd' # # PARAM: server generated corpus word & word vector file paths # corpus_word_path = '../data/task_nontask_chat/ch2v_word_list' # corpus_word_vector_path = '../data/task_nontask_chat/ch2v_vector_list' # # PARAM: generate sentence vectors for task-correct dataset; task-error dataset; nontask-correct dataset, nontask-error dataset, chat dataset # task_utt_path_list = ['../data/utterances/task/task-correct.csv', '../data/utterances/task/task-error.csv', '../data/utterances/nontask/nontask-correct.csv', '../data/utterances/nontask/nontask-error.csv', '../data/chat/chatbot.txt'] # output_fpath_list = ['../output/task_correct_sentence_vector_list', '../output/task_error_sentence_vector_list', '../output/nontask_correct_sentence_vector_list', '../output/nontask_error_sentence_vector_list', '../output/chat_sentence_vector_list'] # generate sentence vector file for task_utt_fpath, output_fpath in zip(utt_path_list, output_fpath_list): gen_sentvec_file(corpus_word_path, corpus_word_vector_path, task_utt_fpath, output_fpath, gen_method) print '# Sentence vector generated #' # ----- / STEP2: run logistic regression / ----- # # PARAM # each line in category file corresponding to each line in vector file # category paths # task_cat_path = '../data/utterances/task/task-category.csv' # nontask_cat_path = '../data/utterances/nontask/nontask-category.csv' # vector paths - the files generated by gen_sentvec.py senvec_fpath_dic = dict() senvec_fpath_dic['task_correct_vector_fpath'] = output_fpath_list[0] senvec_fpath_dic['task_error_vector_fpath'] = output_fpath_list[1] senvec_fpath_dic['nontask_correct_vector_fpath'] = output_fpath_list[2] senvec_fpath_dic['nontask_error_vector_fpath'] = output_fpath_list[3] senvec_fpath_dic['chat_vector_fpath'] = output_fpath_list[4] print 'LR result for correct dataset:' lr(task_cat_path, nontask_cat_path, senvec_fpath_dic['task_correct_vector_fpath'], senvec_fpath_dic['nontask_correct_vector_fpath'], senvec_fpath_dic['chat_vector_fpath']) print 'LR result for error dataset:' lr(task_cat_path, nontask_cat_path, senvec_fpath_dic['task_error_vector_fpath'], senvec_fpath_dic['nontask_error_vector_fpath'], senvec_fpath_dic['chat_vector_fpath'])
def main(): timers.start("dataload") train_pix, train_labels, test_pix, test_labels = load_train_data() timers.stop("dataload") method = find_argv("method", "nn") if method == "pybrain": import nn_pybrain nn_pybrain.nn(train_pix, train_labels, test_pix, test_labels) if method == "nn": import nn nn.nn2(train_pix, train_labels, test_pix, test_labels) elif method == "lr": import lr lr.lr(train_pix, train_labels, test_pix, test_labels)
def trainModel(training_data, data_model, alpha=.001): '''Use logistic regression to train weights for all fields in the data model''' labels = training_data['label'] examples = training_data['distances'] (weight, bias) = lr.lr(labels, examples, alpha) fields = sorted(data_model['fields'].keys()) for (i, name) in enumerate(fields): data_model['fields'][name]['weight'] = float(weight[i]) data_model['bias'] = bias return data_model
def trainModel(training_data, data_model, alpha=0.001): """ Use logistic regression to train weights for all fields in the data model """ labels = training_data["label"] examples = training_data["distances"] (weight, bias) = lr.lr(labels, examples, alpha) for i, name in enumerate(data_model["fields"]): data_model["fields"][name]["weight"] = float(weight[i]) data_model["bias"] = bias return data_model
def trainModel(training_data, data_model, alpha=.001): """ Use logistic regression to train weights for all fields in the data model """ labels = numpy.array(training_data['label'] == 'match', dtype='i4') examples = training_data['distances'] (weight, bias) = lr.lr(labels, examples, alpha) for i, name in enumerate(data_model['fields']) : data_model['fields'][name]['weight'] = float(weight[i]) data_model['bias'] = bias return data_model
def trainModel(training_data, data_model, alpha=.001): labels = training_data['label'] examples = training_data['field_distances'] (weight, bias) = lr.lr(labels, examples, alpha) fields = sorted(data_model['fields'].keys()) #weights = dict(zip(fields[0], weight)) for i, name in enumerate(fields): data_model['fields'][name]['weight'] = float(weight[i]) data_model['bias'] = bias return data_model
def trainModel(training_data, data_model, alpha=.001): (labels, fields, examples) = zip(*[(l, f, e) for (l, (f, e)) in training_data]) labels = numpy.array(labels, dtype='i4') examples = numpy.array(examples, dtype='f4') (weight, bias) = lr.lr(labels, examples, alpha) weights = dict(zip(fields[0], weight)) for name in data_model['fields']: data_model['fields'][name]['weight'] = float(weights[name]) data_model['bias'] = bias return data_model
from metric import printErrorMetrics from rf import rf from rr import rr from nn import nn from lr import lr if __name__ == '__main__': extract_dir = sys.argv[1] fnum = int(sys.argv[2]) """ datasets and labels's size is fnum """ datasets, labels = GetAllData(extract_dir, fnum, 'bfs', total_vertex_num=4900578, L=500000) # datasets, labels = GetAllData(extract_dir, fnum, 'bfs', total_vertex_num=65608366, L=10000000) """ ridge regression """ sr, sl = rr(datasets, labels, fnum) """ neural network """ sr, sl = nn(datasets, labels, fnum) """ liner regression """ sr, sl = lr(datasets, labels, fnum) """ random forest """ sr, sl = rf(datasets, labels, fnum) """ draw picture """ sample_draw(sr,sl)
import SVM import lr import Bayes import LDA LDA.LDA() Bayes.Bayes() SVM.svmwch() lr.lr()
def __init__(self, labels, examples): self.weights, self.bias = lr(labels, examples, 0.01, 500, 5000, 0.01)