Python getEWcnt примеры, emowords.getEWcnt Python примеры использования

Пример #1

0

Показать файл

def Testing(directory,m1,feature_space):
	vectors = []
	labels = []
	count = 0
	for f in os.listdir(directory):
		of = open(directory+'/'+f,'r')
		count+=len(of.readlines())
	widgets = ['Test_Vectorize: ', Percentage(), ' ', Bar(marker='0',left='[',right=']'),
			        ' ', ETA(), ' ', FileTransferSpeed()]
	pbar = ProgressBar(widgets=widgets, maxval=count)
	pbar.start()
	
	curCount=0
	for f in os.listdir(directory):
		of = open(directory+'/'+f,'r')
		lineNum = 0
		#extra = open(base+'/'+'Testing_birdy/'+f,'r')
		#extra_v = extra.read().split('\n')
		for line_num, line in enumerate(of.readlines()):
			vector = vectorize(feature_space,line)
			#print vector
		#	vector.extend(map(int,extra_v[lineNum].split(',')))
			vector.extend(emowords.getEWcnt(line,'1000'))
			vectors.append(vector)
			lineNum+=1
			#print test_label_map[f]
			labels.append(test_label_map[f])
			pbar.update(curCount)
			curCount+=1
	
	pbar.finish()
	m, p_acc, p_vals = predict(labels, vectors, m1)
	
	#print m
	return [m,p_acc,p_vals]

Пример #2

0

Показать файл

Файл: Mix_IG.py Проект: upennyayang/twitter-social-web-mining

def Testing(directory, m1, feature_space):
    vectors = []
    labels = []
    count = 0
    for f in os.listdir(directory):
        of = open(directory + '/' + f, 'r')
        count += len(of.readlines())
    widgets = [
        'Test_Vectorize: ',
        Percentage(), ' ',
        Bar(marker='=', left='[', right=']'), ' ',
        ETA(), ' ',
        FileTransferSpeed()
    ]
    pbar = ProgressBar(widgets=widgets, maxval=count)
    pbar.start()

    curCount = 0
    for f in os.listdir(directory):
        of = open(directory + '/' + f, 'r')
        for line_num, line in enumerate(of.readlines()):
            vector = vectorize(feature_space, line)
            vector.extend(emowords.getEWcnt(line, '0001'))
            #print vector
            vectors.append(vector)
            #print test_label_map[f]
            labels.append(test_label_map[f])
            pbar.update(curCount)
            curCount += 1

    pbar.finish()
    m, p_acc, p_vals = predict(labels, vectors, m1)

    cm = {}

    for i in range(0, 5):
        cm[i] = {}
        for j in range(0, 5):
            cm[i][j] = 0

    for i in range(0, len(m)):
        cm[labels[i]][m[i]] += 1

    print cm
    return [m, p_acc, p_vals]

Пример #3

0

Показать файл

Файл: Mix_IG_web.py Проект: byouloh/twitter-social-web-mining

def Testing(directory,m1,feature_space):
	vectors = []
	labels = []
	count = 0
	for f in os.listdir(directory):
		of = open(directory+'/'+f,'r')
		count+=len(of.readlines())
	widgets = ['Test_Vectorize: ', Percentage(), ' ', Bar(marker='=',left='[',right=']'),
			        ' ', ETA(), ' ', FileTransferSpeed()]
	pbar = ProgressBar(widgets=widgets, maxval=count)
	pbar.start()

	curCount=0
	for f in os.listdir(directory):
		of = open(directory+'/'+f,'r')
		for line_num, line in enumerate(of.readlines()):
			vector = vectorize(feature_space,line)
			vector.extend(emowords.getEWcnt(line,'0001'))
			#print vector
			vectors.append(vector)
			#print test_label_map[f]
			labels.append(test_label_map[f])
			pbar.update(curCount)
			curCount+=1

	pbar.finish()
	m, p_acc, p_vals = predict(labels, vectors, m1)


	cm={}

	for i in range(0,5):
		cm[i]={}
		for j in range(0,5):
			cm[i][j]=0


	for i in range(0,len(m)):
		cm[labels[i]][m[i]]+=1


	print cm
	return [m,p_acc,p_vals]

Пример #4

0

Показать файл

Файл: Mix_IG.py Проект: upennyayang/twitter-social-web-mining

def Training(directory, bi):
    # -s svm_type : set type of SVM (default 0)
    # 	0 -- C-SVC
    # 	1 -- nu-SVC
    # 	2 -- one-class SVM
    # 	3 -- epsilon-SVR
    # 	4 -- nu-SVR
    # -t kernel_type : set type of kernel function (default 2)
    # 	0 -- linear: u'*v
    # 	1 -- polynomial: (gamma*u'*v + coef0)^degree
    # 	2 -- radial basis function: exp(-gamma*|u-v|^2)
    # 	3 -- sigmoid: tanh(gamma*u'*v + coef0)
    # -d degree : set degree in kernel function (default 3)
    # -g gamma : set gamma in kernel function (default 1/num_features)
    # -r coef0 : set coef0 in kernel function (default 0)
    # -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
    # -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
    # -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
    # -m cachesize : set cache memory size in MB (default 100)
    # -e epsilon : set tolerance of termination criterion (default 0.001)
    # -h shrinking: whether to use the shrinking heuristics, 0 or 1 (default 1)
    # -b proba	lity_estimates: whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
    # -wi weight: set the parameter C of class i to weight*C, for C-SVC (default 1)

    #The k in the -g option means the number of attributes in the input data.
    vectors = []
    labels = []
    sentences = load_collection_sentence(directory)
    feature_space = create_feature_space_IG(bi)
    #print feature_space
    # try:
    #    with open('libsvm_SVC_Present.model'):
    # 		m = svm_load_model('libsvm_SVC_Present.model')
    # 		return [m,feature_space]
    # except IOError:
    #    print 'Start Training Over.'

    count = 0
    for f in os.listdir(directory):
        of = open(directory + '/' + f, 'r')
        count += len(of.readlines())
    widgets = [
        'Train_Vectorize: ',
        Percentage(), ' ',
        Bar(marker='=', left='[', right=']'), ' ',
        ETA(), ' ',
        FileTransferSpeed()
    ]
    pbar = ProgressBar(widgets=widgets, maxval=count)
    pbar.start()
    curCount = 0
    for f in os.listdir(directory):
        of = open(directory + '/' + f, 'r')
        for line_num, line in enumerate(of.readlines()):
            vector = vectorize(feature_space, line)
            vector.extend(emowords.getEWcnt(line, '0001'))
            vectors.append(vector)
            labels.append(label_map[f])
            pbar.update(curCount)
            curCount += 1
            #print label_map[f]
            #print line

    pbar.finish()
    prob = problem(labels, vectors)
    param = parameter('-s 0')
    m1 = train(prob, param)
    #m1 = svm_train(labels, vectors, '-s 0 -t 0')
    save_model('libsvm_SVC_Mix.model', m1)
    return [m1, feature_space]

Пример #5

0

Показать файл

Файл: Unigram_dict.py Проект: byouloh/twitter-social-web-mining

def Training(directory):
	# -s svm_type : set type of SVM (default 0)
	# 	0 -- C-SVC
	# 	1 -- nu-SVC
	# 	2 -- one-class SVM
	# 	3 -- epsilon-SVR
	# 	4 -- nu-SVR
	# -t kernel_type : set type of kernel function (default 2)
	# 	0 -- linear: u'*v
	# 	1 -- polynomial: (gamma*u'*v + coef0)^degree
	# 	2 -- radial basis function: exp(-gamma*|u-v|^2)
	# 	3 -- sigmoid: tanh(gamma*u'*v + coef0)
	# -d degree : set degree in kernel function (default 3)
	# -g gamma : set gamma in kernel function (default 1/num_features)
	# -r coef0 : set coef0 in kernel function (default 0)
	# -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
	# -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
	# -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
	# -m cachesize : set cache memory size in MB (default 100)
	# -e epsilon : set tolerance of termination criterion (default 0.001)
	# -h shrinking: whether to use the shrinking heuristics, 0 or 1 (default 1)
	# -b probability_estimates: whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
	# -wi weight: set the parameter C of class i to weight*C, for C-SVC (default 1)

	#The k in the -g option means the number of attributes in the input data.
	vectors = []
	labels = []
	sentences = load_collection_sentence(directory)
	feature_space = create_feature_space(sentences)
	print len(feature_space)
	# try:
	#    with open('libsvm_SVC_stem_emoticons_birdy.model'): 
	# 		m = load_model('libsvm_SVC_stem_emoticons_birdy.model')
	# 		return [m,feature_space]
	# except IOError:
	#    print 'Start Training Over.'
	
	count = 0
	for f in os.listdir(directory):
		of = open(directory+'/'+f,'r')
		count+=len(of.readlines())
	widgets = ['Train_Vectorize: ', Percentage(), ' ', Bar(marker='0',left='[',right=']'),
		        ' ', ETA(), ' ', FileTransferSpeed()]
	pbar = ProgressBar(widgets=widgets, maxval=count)
	pbar.start()
	curCount = 0
	for f in os.listdir(directory):
		of = open(directory+'/'+f,'r')
		lineNum = 0
		#extra = open(base+'/'+'Training_birdy/'+f,'r')
		#extra_v = extra.read().split('\n')
		for line_num, line in enumerate(of.readlines()):
			vector = vectorize(feature_space,line)
			vector.extend(emowords.getEWcnt(line,'0001'))
			#print extra_v[lineNum]
		#	vector.extend(map(int,extra_v[lineNum].split(',')))
			vectors.append(vector)
			
		
			labels.append(label_map[f])
			pbar.update(curCount)
			curCount+=1
			lineNum+=1
			#print label_map[f]
			#print line
	
	pbar.finish()
	prob  = problem(labels, vectors)
	#print 'get here'
	param = parameter('-s 0')
	#print 'get here'
	m1 = train(prob, param)
	#print 'get here'
	#save_model('libsvm_SVC_stem_emoticons_dict.model', m1)
	return [m1,feature_space]

Python getEWcnt примеры использования