Python FeatureExtractor.idf примеры использования

Язык программирования: Python

Класс/Тип: FeatureExtractor

Метод/Функция: idf

Примеров на hotexamples.com: 2

Python FeatureExtractor.idf - 2 примера найдено. Это лучшие примеры Python кода для FeatureExtractor.idf из пакета audio, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FeatureExtractor(26)

augmented_frequency(2)

set_class(2)

frequency(2)

get_featurelist_from_nparray(2)

idf(2)

ExtractSIFTForGrid(1)

saveFilenames(1)

set_frameslength(1)

saveWordList(1)

get_velocity(1)

saveAllVectors(1)

processAllDocs(1)

Extract_All_Features(1)

authorFinder(1)

__featureextractor__(1)

similarityFinder(1)

Пример #1

Показать файл

Файл: NaiveBayes.py Проект: vairylein/LUSmidterm3

def test(doc, name):

	f= open(name,"w")

	frequencies0 = FeatureExtractor.frequency(doc[:2],True,True) #frequency count smoothed by 1
	frequencies1 = FeatureExtractor.augmented_frequency(frequencies0) # augmented frequencies taking into account document size
	frequencies = FeatureExtractor.idf(frequencies1) # idfs
	total = frequencies["**prob**"]
	totals = sum(total)

	priors =[0.33, 0.33, 0.329] # based on number of documents

	a = ["C2","IKEA_EN","IKEA_IT"]


	with open(doc[2],"r") as mefile:

		for line in mefile:
			lines = line.split('\t')
			ID = lines[0]
			words = lines[4].replace("<s>","").replace("</s>","").split(" ")
			pC2 = 0
			pIKEA_IT = 0
			pIKEA_EN = 0
			for word in words:
				
				if word in frequencies:
					pC2 += math.log((frequencies[word][0]))
					pIKEA_EN += math.log((frequencies[word][1]))
					pIKEA_IT += math.log((frequencies[word][2]))

				else:
					pC2 += math.log(0.5)
					pIKEA_EN += math.log(0.5)
					pIKEA_IT += math.log(0.5)
			
			b = [pC2+math.log(priors[0]),pIKEA_EN+math.log(priors[1]),pIKEA_IT+math.log(priors[2])]
			
			

			proposal = a[b.index(max(b))]
			f.write(ID+ "\t" + proposal + "\n")


	f.close()

Пример #2

Показать файл

Файл: NaiveBayes.py Проект: vairylein/LUSmidterm3

def validate(doc, name):

	f= open(name,"w")

	frequencies0 = FeatureExtractor.frequency(doc[:2]) #frequency count smoothed by 1
	frequencies1 = FeatureExtractor.augmented_frequency(frequencies0) # augmented frequencies taking into account document size
	frequencies = FeatureExtractor.idf(frequencies1) # idfs
	total = frequencies["**prob**"]
	totals = sum(total)

	priors =[0.33, 0.33, 0.328] # based on number of documents


	a = ["C2","IKEA_EN","IKEA_IT"]
	correct = 0
	number = 0

	tpC2      = 0
	tpIKEA_EN = 0
	tpIKEA_IT = 0
	fpC2      = 0
	fpIKEA_EN = 0
	fpIKEA_IT = 0
	fnC2      = 0
	fnIKEA_EN = 0
	fnIKEA_IT = 0


	with open(doc[2],"r") as mefile:

		for line in mefile:
			lines = line.split('\t')
			ID = lines[1]
			words = lines[4].replace("<s>","").replace("</s>","").split(" ")
			pC2 = 0
			pIKEA_IT = 0
			pIKEA_EN = 0
			for word in words:
				
				if word in frequencies:
					pC2 += math.log((frequencies[word][0]))
					pIKEA_EN += math.log((frequencies[word][1]))
					pIKEA_IT += math.log((frequencies[word][2]))

				else:
					pC2 += math.log(0.5)
					pIKEA_EN += math.log(0.5)
					pIKEA_IT += math.log(0.5)
			
			b = [pC2+math.log(priors[0]),pIKEA_EN+math.log(priors[1]),pIKEA_IT+math.log(priors[2])]
			# other possibilities
			# d = [pC2,pIKEA_EN,pIKEA_IT] # without priors
			# c = [-pC2*priors[0],-pIKEA_EN*priors[1],-pIKEA_IT*priors[2]] # multiplying by priors
			
			

			proposal = a[b.index(max(b))]
			f.write(proposal + "\t" + ID + "\n")

			# calculate precision, recall, f1
			# count true positives, false positives, false negatives
			print proposal
			

			if ID == proposal:
				if ID == "C2":
					tpC2+=1
				elif ID == "IKEA_EN":
					tpIKEA_EN +=1
				elif ID == "IKEA_IT":
					tpIKEA_IT += 1
				correct += 1
			else:
				if ID == "C2":
					fnC2+=1
				elif ID == "IKEA_EN":
					fnIKEA_EN +=1
				elif ID == "IKEA_IT":
					fnIKEA_IT += 1
				if proposal == "C2":
					fpC2+=1
				elif proposal == "IKEA_EN":
					fpIKEA_EN +=1
				elif proposal == "IKEA_IT":
					fpIKEA_IT += 1

			number +=1

	print fnC2
	precisionC2 = tpC2 / ( tpC2 + fpC2 )
	precisionIKEA_IT = tpIKEA_IT / ( tpIKEA_IT + fpIKEA_IT)
	precisionIKEA_EN = tpIKEA_EN / ( tpIKEA_EN + fpIKEA_EN)
	precisions = [precisionC2, precisionIKEA_EN,precisionIKEA_IT]
	recallC2 = tpC2 / ( tpC2 + fnC2 )
	recallIKEA_IT = tpIKEA_IT / ( tpIKEA_IT + fnIKEA_IT)
	recallIKEA_EN = tpIKEA_EN / ( tpIKEA_EN + fnIKEA_EN)
	recalls = [recallC2,recallIKEA_EN,recallIKEA_IT]

	avgpre = sum(precisions)/3
	avgrec = sum(recalls)/3



	f.write("\n\ncorrect: " + str(correct) + "out of" + str(number))
	f.write("\nprecision: " + str(avgpre))
	f.write("\nrecall: " + str(avgrec))
	f.write("\nF1: " + str( 2* ((avgpre*avgrec) / (avgpre + avgrec)) ))

	f.close()