def main():
    domain = createDomain()
    l0 = len(domain[0])
    l1 = len(domain[1])
    l2 = len(domain[2])
    l3 = len(domain[3])
    l4 = len(domain[4])
    l5 = len(domain[5])
    l6 = len(domain[6])
    print('len(e0):' + str(len(domain[0])))
    print('len(e1):' + str(len(domain[1])))
    print('len(e2):' + str(len(domain[2])))
    print('len(e3):' + str(len(domain[3])))  #  472  470 0.8 376
    print('len(e4):' + str(len(domain[4])))
    print('len(e5):' + str(len(domain[5])))
    print('len(e6):' + str(len(domain[6])))

    domain_2 = []  # re-sampling
    for i in range(360):
        domain_2.append(random.choice(domain[2]))
    domain[2] = domain_2
    tests=domain[0][:72]+domain[1][:72]+domain[2][:72]+domain[3][:72]+domain[4][:72]\
      +domain[5][:72]+domain[6][:72]
    trainList = []  # 训练样本列表
    for i in range(5):
        trainList.append(getTrains(domain))
def run(seed):
    domain = createDomain()
    l0 = len(domain[0])
    l1 = len(domain[1])
    l2 = len(domain[2])
    l3 = len(domain[3])
    l4 = len(domain[4])
    l5 = len(domain[5])
    l6 = len(domain[6])
    print('len(e0):' + str(len(domain[0])))
    print('len(e1):' + str(len(domain[1])))
    print('len(e2):' + str(len(domain[2])))
    print('len(e3):' + str(len(domain[3])))  #  472  470 0.8 376
    print('len(e4):' + str(len(domain[4])))
    print('len(e5):' + str(len(domain[5])))
    print('len(e6):' + str(len(domain[6])))

    # docs=domain[0]+domain[1]+domain[2]+domain[3]+domain[4]+domain[5]+domain[6]
    # trains=domain[0][int(l0*0.2):]+domain[1][int(l1*0.2):]+domain[2][int(l2*0.2):]+\
    # 		domain[3][int(l3*0.2):]+domain[4][int(l4*0.2):]+domain[5][int(l5*0.2):]+domain[6][int(l6*0.2):]
    tests=domain[0][:72]+domain[1][:72]+domain[2][:72]+domain[3][:72]+domain[4][:72]\
      +domain[5][:72]+domain[6][:72]
    domain_2 = []  # re-sampling
    for j in range(6):
        for i in range(2204 - len(domain[j])):
            domain[j].append(random.choice(domain[j][72:]))
    # 测试样本  固定每类别的前90 *7
    trains=domain[0][72:]+domain[1][72:]+domain[2][72:]+domain[3][72:]+domain[4][72:]\
      +domain[5][72:]+domain[6][72:]

    # for item in domain:
    # 	random.shuffle(item)
    # 训练样本  随机采样 360 *7
    # trains=random.sample(domain[0][72:],288)+random.sample(domain[1][72:],288)+random.sample(domain[2][72:],288)\
    # 		+random.sample(domain[3][72:],288)+random.sample(domain[4][72:],288)\
    # 	     +random.sample(domain[5][72:],288)+random.sample(domain[6][72:],288)

    # random.shuffle(trains)
    # tests=domain[0][:int(l0*0.2)]+domain[1][:int(l1*0.2)]+domain[2][:int(l2*0.2)]+domain[3][:int(l3*0.2)]+\
    #  	   domain[4][:int(l4*0.2)]+domain[5][:int(l5*0.2)]+domain[6][:int(l6*0.2)]

    #random.shuffle(tests)
    #random.shuffle(docs)
    # trains=docs[:int(len(docs)*0.8)]
    # tests=docs[int(len(docs)*0.8):]
    print('len(trains):' + str(len(trains)))
    print('len(tests):' + str(len(tests)))

    # lexcion=maxent.get_lexcion(trains)
    # print('len(lexcion):'+str(len(lexcion)))

    maxent.me_classify(trains, tests)
    #maxent.createResult(tests,'result.txt')
    # acc=maxent.createResult2('result.txt')
    acc, G_mean = maxent.createPRF('result.txt', seed)
    return acc, G_mean
Пример #3
0
#! /usr/bin/env python
#coding=utf-8
from __future__ import division
from document import createDomain
from randomclassify import randomClassify

domain=createDomain('kitchen')
trains=domain[0][200:]+domain[1][200:]
tests=domain[0][:200]+domain[1][:200]
randomClassify(trains,tests)
def main(seed):
	domain=createDomain()
	l0=len(domain[0])
	l1=len(domain[1])
	l2=len(domain[2])
	l3=len(domain[3])
	l4=len(domain[4])
	l5=len(domain[5])
	l6=len(domain[6])
	print('len(e0):'+str(len(domain[0])))
	print('len(e1):'+str(len(domain[1])))
	print('len(e2):'+str(len(domain[2])))
	print('len(e3):'+str(len(domain[3])))   #  472  470 0.8 376
	print('len(e4):'+str(len(domain[4])))
	print('len(e5):'+str(len(domain[5])))
	print('len(e6):'+str(len(domain[6])))

	tests=domain[0][:72]+domain[1][:72]+domain[2][:72]+domain[3][:72]+domain[4][:72]\
			+domain[5][:72]+domain[6][:72]
	domain_2=[]   # re-sampling
	for i in range(360-len(domain[2][72:])): 
		domain[2].append(random.choice(domain[2][72:]))
	# domain[2]=domain_2
	
	# trainList=[]  # 训练样本列表
	pred_prob_list=[]
	co_num=5
	for i in range(co_num):
		# trainList.append(getTrains(domain))
		trains=getTrains(domain)
		pred_prob,real_label=run(trains,tests,'result_%d.txt' %(i+1),seed)
		pred_prob_list.append(pred_prob)
	co_pred_prob=[]
	print len(pred_prob_list)

	for i in range(len(tests)):
		p=[]
		# p0=pred_prob_list[0][i][0]+pred_prob_list[1][i][0]+pred_prob_list[2][i][0]+pred_prob_list[3][i][0]+\
		# 	pred_prob_list[4][i][0]
		p0=sum([item[i][0] for item in pred_prob_list])
		p1=sum([item[i][1] for item in pred_prob_list])
		p2=sum([item[i][2] for item in pred_prob_list])
		p3=sum([item[i][3] for item in pred_prob_list])
		p4=sum([item[i][4] for item in pred_prob_list])
		p5=sum([item[i][5] for item in pred_prob_list])
		p6=sum([item[i][6] for item in pred_prob_list])
		# p1=pred_prob_list[0][i][1]+pred_prob_list[1][i][1]+pred_prob_list[2][i][1]+pred_prob_list[3][i][1]+\
		# 	pred_prob_list[4][i][1]
		# p2=pred_prob_list[0][i][2]+pred_prob_list[1][i][2]+pred_prob_list[2][i][2]+pred_prob_list[3][i][2]+\
		# 	pred_prob_list[4][i][2]
		# p3=pred_prob_list[0][i][3]+pred_prob_list[1][i][3]+pred_prob_list[2][i][3]+pred_prob_list[3][i][3]+\
		# 	pred_prob_list[4][i][3]
		# p4=pred_prob_list[0][i][4]+pred_prob_list[1][i][4]+pred_prob_list[2][i][4]+pred_prob_list[3][i][4]+\
		# 	pred_prob_list[4][i][4]
		# p5=pred_prob_list[0][i][5]+pred_prob_list[1][i][5]+pred_prob_list[2][i][5]+pred_prob_list[3][i][5]+\
		# 	pred_prob_list[4][i][5]
		# p6=pred_prob_list[0][i][6]+pred_prob_list[1][i][6]+pred_prob_list[2][i][6]+pred_prob_list[3][i][6]+\
		# 	pred_prob_list[4][i][6]
		p=[p0,p1,p2,p3,p4,p5,p6]
		co_pred_prob.append(p)
	# print co_pred_prob[0]
	print len(co_pred_prob)
	co_max_prob_label=[]
	for item in co_pred_prob:
		m=max(item)
		co_max_prob_label.append(item.index(m))
		# co_max_prob.append(max(item))
	print co_max_prob_label
	acc,g_mean=maxent.createPRF(co_max_prob_label,real_label,seed,'co_resort_maxent_prf.txt')
	print acc,g_mean
	return acc,g_mean
Пример #5
0
#! python2
#! /usr/bin/env python
#coding=utf-8
from __future__ import division
from document import createDomain
from svmclassify import svm_classify

domain = createDomain('mine')
trains = domain[0][50:] + domain[1][50:]
tests = domain[0][:50] + domain[1][:50]
svm_classify(trains, tests)