def Testing(directory,m1,feature_space): vectors = [] labels = [] count = 0 for f in os.listdir(directory): of = open(directory+'/'+f,'r') count+=len(of.readlines()) widgets = ['Test_Vectorize: ', Percentage(), ' ', Bar(marker='0',left='[',right=']'), ' ', ETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=widgets, maxval=count) pbar.start() curCount=0 for f in os.listdir(directory): of = open(directory+'/'+f,'r') lineNum = 0 #extra = open(base+'/'+'Testing_birdy/'+f,'r') #extra_v = extra.read().split('\n') for line_num, line in enumerate(of.readlines()): vector = vectorize(feature_space,line) #print vector # vector.extend(map(int,extra_v[lineNum].split(','))) vector.extend(emowords.getEWcnt(line,'1000')) vectors.append(vector) lineNum+=1 #print test_label_map[f] labels.append(test_label_map[f]) pbar.update(curCount) curCount+=1 pbar.finish() m, p_acc, p_vals = predict(labels, vectors, m1) #print m return [m,p_acc,p_vals]
def Testing(directory, m1, feature_space): vectors = [] labels = [] count = 0 for f in os.listdir(directory): of = open(directory + '/' + f, 'r') count += len(of.readlines()) widgets = [ 'Test_Vectorize: ', Percentage(), ' ', Bar(marker='=', left='[', right=']'), ' ', ETA(), ' ', FileTransferSpeed() ] pbar = ProgressBar(widgets=widgets, maxval=count) pbar.start() curCount = 0 for f in os.listdir(directory): of = open(directory + '/' + f, 'r') for line_num, line in enumerate(of.readlines()): vector = vectorize(feature_space, line) vector.extend(emowords.getEWcnt(line, '0001')) #print vector vectors.append(vector) #print test_label_map[f] labels.append(test_label_map[f]) pbar.update(curCount) curCount += 1 pbar.finish() m, p_acc, p_vals = predict(labels, vectors, m1) cm = {} for i in range(0, 5): cm[i] = {} for j in range(0, 5): cm[i][j] = 0 for i in range(0, len(m)): cm[labels[i]][m[i]] += 1 print cm return [m, p_acc, p_vals]
def Testing(directory,m1,feature_space): vectors = [] labels = [] count = 0 for f in os.listdir(directory): of = open(directory+'/'+f,'r') count+=len(of.readlines()) widgets = ['Test_Vectorize: ', Percentage(), ' ', Bar(marker='=',left='[',right=']'), ' ', ETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=widgets, maxval=count) pbar.start() curCount=0 for f in os.listdir(directory): of = open(directory+'/'+f,'r') for line_num, line in enumerate(of.readlines()): vector = vectorize(feature_space,line) vector.extend(emowords.getEWcnt(line,'0001')) #print vector vectors.append(vector) #print test_label_map[f] labels.append(test_label_map[f]) pbar.update(curCount) curCount+=1 pbar.finish() m, p_acc, p_vals = predict(labels, vectors, m1) cm={} for i in range(0,5): cm[i]={} for j in range(0,5): cm[i][j]=0 for i in range(0,len(m)): cm[labels[i]][m[i]]+=1 print cm return [m,p_acc,p_vals]
def Training(directory, bi): # -s svm_type : set type of SVM (default 0) # 0 -- C-SVC # 1 -- nu-SVC # 2 -- one-class SVM # 3 -- epsilon-SVR # 4 -- nu-SVR # -t kernel_type : set type of kernel function (default 2) # 0 -- linear: u'*v # 1 -- polynomial: (gamma*u'*v + coef0)^degree # 2 -- radial basis function: exp(-gamma*|u-v|^2) # 3 -- sigmoid: tanh(gamma*u'*v + coef0) # -d degree : set degree in kernel function (default 3) # -g gamma : set gamma in kernel function (default 1/num_features) # -r coef0 : set coef0 in kernel function (default 0) # -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1) # -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5) # -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) # -m cachesize : set cache memory size in MB (default 100) # -e epsilon : set tolerance of termination criterion (default 0.001) # -h shrinking: whether to use the shrinking heuristics, 0 or 1 (default 1) # -b proba lity_estimates: whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0) # -wi weight: set the parameter C of class i to weight*C, for C-SVC (default 1) #The k in the -g option means the number of attributes in the input data. vectors = [] labels = [] sentences = load_collection_sentence(directory) feature_space = create_feature_space_IG(bi) #print feature_space # try: # with open('libsvm_SVC_Present.model'): # m = svm_load_model('libsvm_SVC_Present.model') # return [m,feature_space] # except IOError: # print 'Start Training Over.' count = 0 for f in os.listdir(directory): of = open(directory + '/' + f, 'r') count += len(of.readlines()) widgets = [ 'Train_Vectorize: ', Percentage(), ' ', Bar(marker='=', left='[', right=']'), ' ', ETA(), ' ', FileTransferSpeed() ] pbar = ProgressBar(widgets=widgets, maxval=count) pbar.start() curCount = 0 for f in os.listdir(directory): of = open(directory + '/' + f, 'r') for line_num, line in enumerate(of.readlines()): vector = vectorize(feature_space, line) vector.extend(emowords.getEWcnt(line, '0001')) vectors.append(vector) labels.append(label_map[f]) pbar.update(curCount) curCount += 1 #print label_map[f] #print line pbar.finish() prob = problem(labels, vectors) param = parameter('-s 0') m1 = train(prob, param) #m1 = svm_train(labels, vectors, '-s 0 -t 0') save_model('libsvm_SVC_Mix.model', m1) return [m1, feature_space]
def Training(directory): # -s svm_type : set type of SVM (default 0) # 0 -- C-SVC # 1 -- nu-SVC # 2 -- one-class SVM # 3 -- epsilon-SVR # 4 -- nu-SVR # -t kernel_type : set type of kernel function (default 2) # 0 -- linear: u'*v # 1 -- polynomial: (gamma*u'*v + coef0)^degree # 2 -- radial basis function: exp(-gamma*|u-v|^2) # 3 -- sigmoid: tanh(gamma*u'*v + coef0) # -d degree : set degree in kernel function (default 3) # -g gamma : set gamma in kernel function (default 1/num_features) # -r coef0 : set coef0 in kernel function (default 0) # -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1) # -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5) # -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) # -m cachesize : set cache memory size in MB (default 100) # -e epsilon : set tolerance of termination criterion (default 0.001) # -h shrinking: whether to use the shrinking heuristics, 0 or 1 (default 1) # -b probability_estimates: whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0) # -wi weight: set the parameter C of class i to weight*C, for C-SVC (default 1) #The k in the -g option means the number of attributes in the input data. vectors = [] labels = [] sentences = load_collection_sentence(directory) feature_space = create_feature_space(sentences) print len(feature_space) # try: # with open('libsvm_SVC_stem_emoticons_birdy.model'): # m = load_model('libsvm_SVC_stem_emoticons_birdy.model') # return [m,feature_space] # except IOError: # print 'Start Training Over.' count = 0 for f in os.listdir(directory): of = open(directory+'/'+f,'r') count+=len(of.readlines()) widgets = ['Train_Vectorize: ', Percentage(), ' ', Bar(marker='0',left='[',right=']'), ' ', ETA(), ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=widgets, maxval=count) pbar.start() curCount = 0 for f in os.listdir(directory): of = open(directory+'/'+f,'r') lineNum = 0 #extra = open(base+'/'+'Training_birdy/'+f,'r') #extra_v = extra.read().split('\n') for line_num, line in enumerate(of.readlines()): vector = vectorize(feature_space,line) vector.extend(emowords.getEWcnt(line,'0001')) #print extra_v[lineNum] # vector.extend(map(int,extra_v[lineNum].split(','))) vectors.append(vector) labels.append(label_map[f]) pbar.update(curCount) curCount+=1 lineNum+=1 #print label_map[f] #print line pbar.finish() prob = problem(labels, vectors) #print 'get here' param = parameter('-s 0') #print 'get here' m1 = train(prob, param) #print 'get here' #save_model('libsvm_SVC_stem_emoticons_dict.model', m1) return [m1,feature_space]