Python loadTrainingData示例，loadData.loadTrainingData Python示例

示例#1

0

显示文件

文件： kNN.py 项目： RISHABHCSI/modifiedmodelmovielens

def classifierTest(dataSet,labels):
	# ratio=0.10
	m=dataSet.shape[0]
	neighbour=25
	# numOfTests=int(m*ratio)
	# dataSet,ranges,minVals=normalize(dataSet)
	trainData=loadData.loadTrainingData("u.data")
	# trainData=minor2.test()
	numOfErrors=0
	dictionary={}
	prev=-1
	testD=loadData.loadTrainingData("u1.test")
	for i in range(0,dataSet.shape[0]):
		user=dataSet[i,0]
		movie=dataSet[i,1]
		classifierResult=int(classify0(dataSet[i,:],trainData,neighbour))
		if i==0:
			prev=user
			dictionary[movie]=classifierResult
		else:
			if user!=prev:
				pred_rating.append(dictionary)
				dictionary={}
				dictionary[movie]=classifierResult
				prev=user
			else:
				dictionary[movie]=classifierResult
	if len(dictionary):
		pred_rating.append(dictionary)
	i=0;ndcg=0
	prec=0;recall=0
	for l in pred_rating:
		# tuPlus=[]
		tuPlus=0;intersection=0
		prec_u=0;recall_u=0;dcg_u=0;idcg_u=0;ndcg_u=0
		temp=sorted(l.items(),key=operator.itemgetter(1),reverse=True)
		top_n=temp[:neighbour]
		for j in range(testD.shape[1]):
			if testD[i,j]==5:
				tuPlus+=1
				num=1
				for q in top_n:
					idcg_u+=(1.0/math.log(num+1,2))
					if q[0]==j+1:
						dcg_u+=(1.0/math.log(num+1,2))
						intersection+=1
					num+=1
				ndcg_u=dcg_u/idcg_u
		print intersection,neighbour,tuPlus
		prec_u=intersection*1.0/neighbour
		if tuPlus:
			recall_u=intersection*1.0/tuPlus
		prec+=prec_u
		recall+=recall_u
		ndcg+=ndcg_u
		i+=1
	prec=prec*1.0/i
	recall=recall*1.0/i
	ndcg=ndcg*1.0/i
	print prec,recall,ndcg

示例#2

0

显示文件

文件： improvedKmeans.py 项目： nishchay271095/MLOnMovielens

def test():
	dataSet=loadData.loadTrainingData("u.data")
	occupation=occupationLoad()
	listOfKValues=[8,16,32,64]
	for x in listOfKValues:
		centroids,clusterAssignment=kMeans(dataSet,x)
		print "For Clusters= %d :-"%x
		testFile="u"
		avg=0.0
		standardDeviation=0.0
		numOfTimes=True
		for i in range(1,6):
			k1,k2=0,0
			testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
			m = shape(dataSet)[0]
			totalError=0
			index=0# for test Label no.
			predictions=[]
			for t in testData:
				user,movie=int(t[0])-1,int(t[1])-1
				label=testLabel[index]
				clusterNum=clusterAssignment[user] # cluster number of the user to test
				userInCluster=[]
				for i in range(0,m):
					if clusterAssignment[i]==clusterNum:
						userInCluster.append(i)
				sumOfRatings=0
				count=0 # number of users who've watched the movie
				count1=0
				for i in userInCluster:
					if dataSet[i][movie]!=0:# if movie is watched
						sumOfRatings+=dataSet[i][movie]
						count+=1
						if occupation[i][1]==occupation[user][1]:
							k1+=dataSet[i][movie]
							count1+=1
				if count==0:# if there is no user in the cluster who've watched the movie
					ratingsPredicted=3
				else:
					if count1!=0:
						temp1=around(sumOfRatings/count)# average of the raings.. round-off
						temp2=around(k1/count1)
						ratingsPredicted=min(temp2,temp1)
					else:
						ratingsPredicted=around(sumOfRatings/count)
				predictions.append(ratingsPredicted)
				totalError+=absolute(ratingsPredicted-label)
				index+=1
			meanError=totalError/len(testData)
			if numOfTimes:
				print metrics.classification_report(testLabel,predictions)
				numOfTimes=False
			# print meanError
			avg+=meanError
			predictions=array(predictions)
			standardDeviationError+=std(predictions)

		print "Mean Error: ", float(avg)/5
		print "Standard Deviation: ",float(standardDeviation)/5
		print

示例#3

0

显示文件

文件： PCA-Kmeans.py 项目： nishchay271095/MLOnMovielens

def testKMeans(dataSet):
	for x in range(50,61,2):
		centroids,clusterAssignment=kMeans(dataSet,x)
		dataSet=loadData.loadTrainingData("u.data")
		testFile="u"
		avg=0.0
		standardDeviationError=0
		for i in range(1,6):
			testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
			m = shape(dataSet)[0]
			totalError=0
			index=0# for test Label no.
			predictions=[]
			for t in testData:
				user,movie=int(t[0])-1,int(t[1])-1
				label=testLabel[index]
				clusterNum=clusterAssignment[user] # cluster number of the user to test
				userInCluster=[]
				for i in range(0,m):
					if clusterAssignment[i]==clusterNum:
						userInCluster.append(i)
				sumOfRatings=0
				count=0 # number of users who've watched the movie
				for i in userInCluster:
					if dataSet[i][movie]!=0:# if movie is watched
						sumOfRatings+=dataSet[i][movie]
						count+=1
				if count==0:# if there is no user in the cluster who've watched the movie
					ratingsPredicted=3
				else:
					ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
				predictions.append(ratingsPredicted)
				totalError+=absolute(ratingsPredicted-label)
				index+=1
			meanError=totalError/len(testData)
			# print "Mean Absolute Error: "+str(meanError)
			# print
			avg+=meanError
			# print "Precision And Recall: "
			# print metrics.classification_report(testLabel,predictions)
			# print
			predictions=array(predictions)
			standardDeviationError+=std(predictions)
			# print "Standard Deviation: "+str(standardDeviationError)
			# meanActual=mean(array(testLabel))
			# standardDeviationActual=std(array(testLabel))
			# tValue=(meanActual-meanError)/( sqrt( (((standardDeviationActual)**2)/len(testLabel)) + (((standardDeviationError)**2)/len(predictions))  )  )
			# print "tValue: "+str(tValue)
		# 	break
		# break
		# print
		print "Mean Absolute Error: "+str(float(avg)/5)
		print
		print "Standard Deviation: "+str(float(standardDeviationError/5))
		print
		break

示例#4

0

显示文件

文件： itemKNN.py 项目： RISHABHCSI/modifiedmodelmovielens

def testClassifier():
	k=25;m=943;n=1682
	trainData=loadData.loadTrainingData("u.data")
	testData=loadData.loadTrainingData("u1.test")
	trainClassifier(trainData, k)

	ndcg=0;dcg=0;idcg=0;prec=0;recall=0

	for i in range(0,testData.shape[0]):
		# print "inside i loop"
		ndcg_u=0;dcg_u=0;idcg_u=0;tuPlus=0;intersection=0;prec_u=0;recall_u=0
		for j in range(0,testData.shape[1]):
			if testData[i,j]==5:
				# print "inside if"

				num=1
				tuPlus=tuPlus+1
				for n in range (0,k):
					idcg_u+=(1.0/math.log(num+1,2))
					# print j in topn[i]

					for a in range(0,k): #if j in topn[i]
						print j,top_nrecommendtest[i][a]
						if j==top_nrecommendtest[i][a]:
							print "inside if"
							intersection+=1
							dcg_u+=(1.0/math.log(num+1,2))
					num+=1
				ndcg_u=dcg_u/idcg_u
		prec_u=1.0*intersection/k
		if tuPlus:
			recall_u=1.0*intersection/tuPlus

		print prec_u,recall_u,ndcg_u
		prec+=prec_u
		recall+=recall_u
		ndcg+=ndcg_u
	ndcg=1.0*ndcg/m
	prec=1.0*prec/m
	recall=1.0*recall/m
	print prec,recall,ndcg

示例#5

0

显示文件

文件： UPCC.py 项目： nishchay271095/MLOnMovielens

def test():
	dataSet=loadData.loadTrainingData("u.data")
	avg=0.0
	standardDeviation=0.0
	for x in range(1,6):
		testSet,testLabel=loadData.loadTestData("u"+str(x)+".test")
		# for i in range(shape(testSet)[0]):
		testLabel=testLabel[:100]
		index=0
		totalError=0
		mTest=0
		predictions=[]
		for t in testSet:
			user,movie=int(t[0])-1,int(t[1])-1
			label=testLabel[index]
			relation=pearson(dataSet,user)
			summation=0.0
			answer=0.0
			count=0
			for j in range(0,shape(dataSet)[0]):
				if user!=j:
					if dataSet[j,movie]!=0:
						summation+=((dataSet[j,movie])*relation[0,j])
						count+=1
			if count==0:
				answer=3
			else:
				answer=around(summation/(count))
			# print answer
			predictions.append(answer)
			totalError+=absolute(answer-label)
			index+=1
			mTest+=1
			if mTest==100:
				break

			# stdDeviation=
		meanError=float(totalError)/mTest
		predictions=array(predictions)
		avg+=meanError
		standardDeviation+=std(predictions)
		print metrics.classification_report(testLabel,predictions)
		# print meanError
	print
	print "Mean Absolute Error: "+str(float(avg)/5)
	print
	print "Standard Deviation: "+str(float(standardDeviation/5))
	print

示例#6

0

显示文件

文件： kNNNormalize.py 项目： RISHABHCSI/modifiedmodelmovielens

def classifierTest(dataSet,labels):
	# ratio=0.10
	m=dataSet.shape[0]
	# numOfTests=int(m*ratio)
	# dataSet,ranges,minVals=normalize(dataSet)
	trainData=loadData.loadTrainingData("u.data")
	numOfErrors=0
	for i in range(0,dataSet.shape[0]):
		classifierResult=int(classify0(dataSet[i,:],trainData,25))
		# print classifierResult,labels[i]
		# print
		# print "the classifier came back with: %d, the real answer is: %d"% (classifierResult, labels[i])
		if classifierResult!=labels[i]:
			numOfErrors+=1
	errorRate=numOfErrors*1.0/dataSet.shape[0]
	print errorRate

示例#7

0

显示文件

文件： FCM.py 项目： nishchay271095/MLOnMovielens

def test():
	dataSet=loadData.loadTrainingData("u.data")
	u,clusterAssignment=fcm(dataSet,2,8,2)
	# centroids,clusterAssignment=kMeans(dataSet,x)# 15 clusters
	print u
	# return
	testFile="u"
	avg=0.0
	for i in range(1,6):
		testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
		m = shape(dataSet)[0]
		totalError=0
		index=0# for test Label no.
		predictions=[]
		# clusterAssignedCode
		for t in testData:
			user,movie=int(t[0])-1,int(t[1])-1
			label=testLabel[index]
			clusterNum=clusterAssignment[user] # cluster number of the user to test
			userInCluster=[]
			for i in range(0,m):
				if clusterAssignment[i]==clusterNum:
					userInCluster.append(i)
			sumOfRatings=0
			count=0 # number of users who've watched the movie
			for i in userInCluster:
				if dataSet[i][movie]!=0:# if movie is watched
					sumOfRatings+=dataSet[i][movie]
					count+=1
			if count==0:# if there is no user in the cluster who've watched the movie
				ratingsPredicted=3
			else:
				ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
			predictions.append(ratingsPredicted)
			print ratingsPredicted
			totalError+=absolute(ratingsPredicted-label)
			index+=1
		meanError=totalError/len(testData)
		# avg+=meanError
		# print metrics.classification_report(testLabel,predictions)

		# meanError=totalError/len(testData)
		print meanError

示例#8

0

显示文件

文件： kMeansCuckoo.py 项目： nishchay271095/MLOnMovielens

def test():
	dataSet=loadData.loadTrainingData("u.data")
	for x in range(20,40,2):
		centroids,clusterAssignment=kMeans(dataSet[:100,:],x,shape(dataSet)[0])# 15 clusters
        emptyPool=[]
        for i in range(100,shape(dataSet)[0]):
            emptyPool.append(i)
        print centroids
        return
        testFile="u"
        avg=0.0
        for i in range(1,6):
        	testData,testLabel=loadData.loadTestData(testFile+str(i)+".test")
        	m = shape(dataSet)[0]
        	totalError=0
        	index=0# for test Label no.
        	predictions=[]
        	for t in testData:
        		user,movie=int(t[0])-1,int(t[1])-1
        		label=testLabel[index]
        		clusterNum=clusterAssignment[user] # cluster number of the user to test
        		userInCluster=[]
        		for i in range(0,m):
        			if clusterAssignment[i]==clusterNum:
        				userInCluster.append(i)
        		sumOfRatings=0
        		count=0 # number of users who've watched the movie
        		for i in userInCluster:
        			if dataSet[i][movie]!=0:# if movie is watched
        				sumOfRatings+=dataSet[i][movie]
        				count+=1
        		if count==0:# if there is no user in the cluster who've watched the movie
        			ratingsPredicted=3
        		else:
        			ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
        		predictions.append(ratingsPredicted)
        		totalError+=absolute(ratingsPredicted-label)
        		index+=1
        	meanError=totalError/len(testData)
        	# print meanError
        	avg+=meanError
        print
        print float(avg)/5

示例#9

0

显示文件

文件： sklearnImplementation.py 项目： nishchay271095/MLOnMovielens

def testKMeansForPca(data):
	dataSet=loadData.loadTrainingData("u1.base")
	# centroids,clusterAssignment=kMeans(dataSet,15)# 15 clusters
	testData,testLabel=loadData.loadTestData("u1.test")
	clf=KMeans(n_clusters=15)
	clf.fit(data)
	clusterAssignment=clf.predict(data)
	m = shape(dataSet)[0]
	totalError=0
	index=0# for test Label no.
	predictions=[]
	for t in testData:
		user,movie=int(t[0])-1,int(t[1])-1
		label=testLabel[index]
		clusterNum=clusterAssignment[user] # cluster number of the user to test
		userInCluster=[]
		for i in range(0,m):
			if clusterAssignment[i]==clusterNum:
				userInCluster.append(i)
		sumOfRatings=0
		count=0 # number of users who've watched the movie
		for i in userInCluster:
			if dataSet[i][movie]!=0:# if movie is watched
				sumOfRatings+=dataSet[i][movie]
				count+=1
		if count==0:# if there is no user in the cluster who've watched the movie
			ratingsPredicted=3
		else:
			ratingsPredicted=around(sumOfRatings/count)# average of the raings.. round-off
		predictions.append(ratingsPredicted)
		totalError+=absolute(ratingsPredicted-label)
		index+=1
	meanError=totalError/len(testData)
	print meanError
	standardDeviation=std(predictions)
	print standardDeviation

示例#10

0

显示文件

文件： PCA-Kmeans.py 项目： nishchay271095/MLOnMovielens

def test():
	dataSet=loadData.loadTrainingData("u.data")#mXn
	data=pca(dataSet,100)
	testKMeans(data)

示例#11

0

显示文件

文件： sklearnImplementation.py 项目： nishchay271095/MLOnMovielens

def testPCA():
	dataSet=loadData.loadTrainingData("u1.base")#mXn
	pca = PCA(n_components=100)
	# data=pca(dataSet,100)
	data=pca.fit_transform(dataSet)
	testKMeansForPca(data)

示例#12

0

显示文件

文件： SOMCorrected.py 项目： nishchay271095/MLOnMovielens

def test():
	no_grids=0
	trainData=loadData.loadTrainingData("u1.base")
	# testData,testLabels=loadData.loadTestData("u1.test")
	# SOM(trainData)
	m=shape(trainData)[0]
	# print testData
	# print testLabels
	# print m
	# centers=mat(zeros((numClusters,2))

	clusters=initClusters()
	print clusters
	centers=mat(zeros((numClusters,2)))
	bmu_of=[0 for m in range(0,users)]
	radius=4
	# print "In test"
	for i in range (0,radius):
		for j in range (0,radius):
			if no_grids<numClusters:
				centers[no_grids,0]=i
				centers[no_grids,1]=j
				# print centers[no_grids,0]
				# print centers[no_grids,1]
				# print
				no_grids+=1
			else:
				break

	iterations=0
	temp_difference=0
	difference=[0 for m in range(numClusters)]
	# print shape(clusters)
	# for c in range(0,movies):
	# 	if clusters[0,c]==5:
	# 		print "usahiuhsauifhduisahfuiashfiuhsauifhuishfuisah"
		# print clusters[0,c]
	# print clusters[0,0]

	while radius>=1:
		for i in range (0,users):
			print i

			for j in range (0,numClusters):
				temp_difference=0
				for k in range (0,movies):
					temp_difference+=(trainData[i,k]-clusters[j,k])*(trainData[i,k]-clusters[j,k])
				difference[j]=sqrt(temp_difference)

			min_difference=10000001
			for j in range (0,numClusters):
				if difference[j]<min_difference:
					min_difference=difference[j]
					bmu_of[i]=j

			currentLearningRate=update_learning_rate(learningRate, iterations, total_iterations)
			temp_rating=0
			for j in range(0,numClusters):
				distance=math.sqrt((centers[bmu_of[i],0]-centers[j,0])*(centers[bmu_of[i],0]-centers[j,0])+(centers[bmu_of[i],1]-centers[j,1])*(centers[bmu_of[i],1]-centers[j,1]))
				if distance<radius:
					newInfluence=update_influence(centers, bmu_of[i], j, radius)
					netChange=0
					for k in range (0,movies):
						# print "updating"
						# print j,k
						# print trainData[i,k]
						# print clusters[j,k]
						# print
						rating_difference=trainData[i,k]-clusters[j,k]
						netChange=(currentLearningRate*newInfluence*rating_difference)
						temp_rating=clusters[j,k]
						temp_rating+=netChange
						clusters[j,k]=temp_rating
						if clusters[j,k]>5:
							clusters[j,k]=5
						if clusters[j,k]<1:
							clusters[j,k]=1
						# break
					# break
				# break
			# break
		print radius
		iterations+=1
		# temp_radius=update_radius(initRadius, time_constant, iterations)
		# radius=temp_radius
		radius-=1


			# break
		# break



	error=0
	count=0

	fr=open("u1.test")
	lines=fr.readlines()
	# testMat=mat(zeros(943,1682))
	for line in lines:
		word = line.split("\t")
		u=int(word[0])-1
		m=int(word[1])-1
		r=int(word[2])
		# testMat[int(word[0])-1,int(word[1])-1]=int(word[2])
		error+=abs(r-clusters[bmu_of[u],m])
		count+=1
	print error/count

示例#13

0

显示文件

文件： itemKNN.py 项目： RISHABHCSI/modifiedmodelmovielens

def trainClassifier(trainData,k):
	m=trainData.shape[0]
	n=trainData.shape[1]
	norm_trainData=trainData/5
	# watched=[0]*943
	watched=zeros((943,1682))
	trans=norm_trainData.T
	model=mat(trans)*mat(norm_trainData)
	final_model=mat(zeros((1682,1682)))

	for i in range(0,m):
		for j in range(0,n):
			if trainData[i,j]==0:
				watched[i,j]=0
			else:
				watched[i,j]=1

	trans_watch=watched.T
	watch_together=mat(trans_watch)*mat(watched)
	# print final_model.shape[0],final_model.shape[1]
	# print model.shape[0],model.shape[1]
	# print watch_together.shape[0],watch_together.shape[1]

	# print model[0,0]
	for i in range (0,n):
		for j in range (0,n):
			if watch_together[i,j]!=0:
				final_model[i,j]=model[i,j]/watch_together[i,j]*1.0
			# else:
			# 	final_model[i,j]=0.0
	for i in range(0,n):
		for j in range(0,n):
			if i==j:
				final_model[i,j]=0

	# print final_model
	u=zeros((943,1682))
	for i in range (0,m):
		for j in range (0,n):
			if trainData[i,j]==0:
				u[i,j]=1;
			else:
				u[i,j]=0

	trans_u=u.T
	recommend=final_model*trans_u
	recommend=recommend.T
	for i in range (0,943):
		for j in range (0,1682):
			if u[i,j]==0:
				recommend[i,j]=0

	topn=zeros((943,1682))
	sumi=0
	# print recommend[0]
	# print
	# for i in range (0,n):
	# 	if recommend[0,i]!=0:
	# 		print i
	# 		sumi=sumi+1
	# print sumis

	for i in range (0,m):
		topn[i]=argsort(recommend[i])

	testData=loadData.loadTrainingData("u1.test")
	# print argmax(recommend[0])
	# print testData[0,argmax(recommend[0])]



	# for i in range (n-1,0,-1):
	# 	print topn[0,i],testData[0,topn[0,i]]


	# print sumi


	# print k
	# print topn
	topn_recommendation=zeros((943,k))
	for i in range (0,m):
		topn_recommendation[i]=topn[i,n-k:]

	for i in range(0,topn_recommendation.shape[0]):
		temp=[]
		for j in range(0,len(topn_recommendation[i])):
			if(testData[i,topn_recommendation[i,j]]!=0):
				temp.append(topn_recommendation[i,j]);
		top_nrecommendtest.append(temp);


	# testData=loadData.loadTrainingData("u1.test")
	# sum1=0
	# sum2=0
	# for i in range(0,trainData.shape[1]):
	# 	if trainData[0,i]!=0:
	# 		sum1=sum1+1
	# for i in range(0,testData.shape[1]):
	# 	if testData[0,i]!=0:
	# 		sum2=sum2+1

	# print sum1,sum2,sum1+sum2

	# print topn_recommendation
	# for i in range (0,k):
	# 	print testData[0,topn_recommendation[0,i]]
	# print topn_recommendation
	return topn_recommendation

示例#14

0

显示文件

文件： itemKNN.py 项目： RISHABHCSI/modifiedmodelmovielens

def test():
	trainData=loadData.loadTrainingData("u.data")
	# trainClassifier(trainData, 25)
	testClassifier()

示例#15

0

显示文件

文件： deleteIt.py 项目： nishchay271095/MLOnMovielens

def test():
	dataSet=loadData.loadTrainingData("u1.base")#mXn
	data=pca(dataSet,200)
	testKMeans(data)# mX200

示例#16

0

显示文件

文件： cuckoo.py 项目： nishchay271095/MLOnMovielens

def test():
    dataSet=loadData.loadTrainingData("u.data")
    for clu in range(74,75,2):
        avg=0.0
        standardDeviation=0.0
        for te in range(1,6):
            testData,testLabel=loadData.loadTestData("u"+str(te)+".test")
            clusters,emptyPool,meanList=initialization(dataSet,clu)
            while len(emptyPool):
                randVar=random.randint(0,len(emptyPool)-1)
                user=emptyPool[randVar]
                randNes=random.randint(0,len(clusters)-1)
                mae=float(sum(abs(dataSet[user,:]-meanList[randNes])))/1682
                mini=100000000
                count=0
                threshold=int(0.3*len(clusters[randNes]))
                minPerson=-1
                for i in range(0,len(clusters[randNes])):
                    mae=float(sum(abs(dataSet[clusters[randNes][i][0],:]-meanList[randNes])))/1682
                    if mae<mini:
                        count+=1
                        mini=mae
                        minPerson=clusters[randNes][i][0]
                if count:
                    if count>=threshold:
                        for c in clusters[randNes]:
                            if c[0]==minPerson:
                                q=clusters[randNes].index(c)
                                for t in range(0,1682):
                                    add=(meanList[randNes][t]*len(meanList[randNes]))-dataSet[minPerson,t]
                                    add=add/(len(meanList[randNes])-1)
                                    meanList[randNes][t]=add
                                del(clusters[randNes][q])
                        emptyPool.append(minPerson)
                    clusters[randNes].append([user,dataSet[user,:]])
                    ind=emptyPool.index(user)
                    for t in range(0,1682):
                        add=(meanList[randNes][t]*len(meanList[randNes]))+dataSet[user,t]
                        add=add/(len(meanList[randNes])+1)
                        meanList[randNes][t]=add
                    del(emptyPool[ind])

                # var+=1
            summation=0
            for c in clusters:
                # print len(c)
                summation+=len(c)
            # print summation
            totalError=0
            predictions=[]
            m = shape(dataSet)[0]
            index=0
            for t in testData:
            	user,movie=int(t[0])-1,int(t[1])-1
                # print user,movie
            	label=testLabel[index]
                check=False
                for i in range(0,len(clusters)):
                    for j in range(0,len(clusters[i])):
                        if clusters[i][j][0]==user:
                            count =0
                            tum=0.0
                            for k in range(0,len(clusters[i])):
                                if dataSet[clusters[i][k][0],movie]!=0:
                                    count+=1
                                    tum+=dataSet[clusters[i][k][0],movie]
                            if count!=0:
                                tum=tum/count
                            check=True
                        if check:
                            break
                    if check:
                        break
            	predictions.append(tum)
            	totalError+=absolute(tum-label)
                index+=1

            meanError=totalError/len(testData)
            print "Precision And Recall: "
            print shape(testLabel)

            print metrics.classification_report(testLabel,predictions)
            # print
            return
            predictions=array(predictions)
            standardDeviation+=std(predictions)
            # print standardDeviation
            # print meanError
            avg+=meanError
            # break
        # break
        print "Standard Deviation: "+str(standardDeviation)

示例#17

0

显示文件

文件： ContentBased.py 项目： RISHABHCSI/modifiedmodelmovielens

lines=f.readlines()
l=[]
for line in lines:
    l.append([float(v) for v in line.split("|")[5:]])
# print l
# print l[1]
for i in range(len(l)):
    count=0
    for j in range(len(l[i])):
        if l[i][j]==1:
            count+=1
    for j in range(len(l[i])):
        if l[i][j]==1:
            l[i][j]/=count
# print l[1]
trainData=loadData.loadTrainingData("u.data")
alpha=0.01
# thres=0.001
theta=mat(zeros((943,19)))
# while True:
print shape(mat(l))
# return
for i in range(10):
    for k in range(19):
        old=theta[i,k]
        thres=0.1
        sumi=0
        # T=2
        while True:
            for j in range(1682):
                if trainData[i,j]:

示例#18

0

显示文件

文件： minor2.py 项目： RISHABHCSI/modifiedmodelmovielens

def test():
	trainData = loadData.loadTrainingData("u.data")
	# create_pref_model(trainData)
	pref_matrix = create_pref_model(trainData)
	# print pref_matrix
	return pref_matrix

示例#19

0

显示文件

文件： som.py 项目： nishchay271095/MLOnMovielens

def test():
    dataSet=loadData.loadTrainingData("u1.base")
    testSet,testLabel=loadData.loadTestData("u1.test")
    som(dataSet)