示例#1
0
	def __init__(self, filename, N):
		"""A LanguageModel object builds a model using the frequencies of the
			N-grams and (N-1)-grams found in the corpus, and enables the
			calculation of the probability of any given N-gram using that model.
		
		Args:
			filename (str): The name of the corpus to train on.
			N (int): The N value of the N-grams, where N > 1.
		
		"""
		if N < 2: return
		self.N = N
		self.Nfreq = countNGrams(filename, N)
		self.N1freq = countNGrams(filename, N-1)
		self.setSmoothing('no')
示例#2
0
def main():
	parsed = parseArgs(sys.argv[1:])
	corpus = parsed["corpus"][0]
	CPF = parsed["conditional_prob_file"][0]
	SPF = parsed["sequence_prob_file"][0]
	SP = parsed["scored_permutations"]
	N = parsed["n"]
	question1(corpus)
	
	# Counting N- and (N-1)-Grams
	N_freq = countNGrams(corpus, N)
	N_min_1_freq = countNGrams(corpus, N-1)
	
	# Calculating probabilities
	nGramProb = NGramProbabilities(N_freq, N_min_1_freq)
	question2(nGramProb, CPF, N)
	sentenceProb = sentenceProbabilities(NGramProb, N)
	question3(sentenceProb, SPF, N)
	question4(sentenceProbs, SP)
示例#3
0
def question1(filename, N=2, M=10):
	"""Prints the M most frequent N-grams.
	
	Args:
		filename (str): The name of the corpus to extract N-grams from.
		N (int): (optional) The N value of the N-grams (bigrams are requested)
		M (int): (optional) The amount of most N-grams printed (10 is requested)
	
	"""
	print("Question 1")
	freq = countNGrams(filename, N)
	sortedList = sorted(freq.items(), key=itemgetter(1))
	sortedList.reverse()
	sortedList.insert(0, ("Bigram", "Frequency"))
	prettyPrint(sortedList, M+1)