예제 #1
0
def textutils_normalize_line():
	expected_file = Utilities.open_file(TST_DIR + TXT_NORMAL_FILE_EXP)
	expected_output = []
	for line in expected_file:
		# need to strip new line characters
		expected_output += line.rstrip().split()

    # this constructs a list where each element is the normalized version
    # of the corresponding line in the file
	test_file = Utilities.open_file(TST_DIR + TXT_NORMAL_FILE)
	test_output = []
	for line in test_file:
		test_output += TextUtils.normalize_line(line)
	assert test_output == expected_output
예제 #2
0
	def file_to_ngram(file_path, n):
		if n < 1:
			return None
		if Utilities.is_file(file_path) == False:
			return None
		fileContent = []
		file = Utilities.open_file(file_path);
		for line in file:
			fileContent += TextUtils.normalize_line(line)
			fileContent.append(' ')

		ng = ngrams(fileContent, n)
		return ng
예제 #3
0
    def file_to_ngram(file_path, n):
        if n < 1:
            return None
        if Utilities.is_file(file_path) == False:
            return None
        fileContent = []
        file = Utilities.open_file(file_path)
        for line in file:
            fileContent += TextUtils.normalize_line(line)
            fileContent.append(' ')

        ng = ngrams(fileContent, n)
        return ng