def task1(): # Task 1 a = fileMK.stemText(t[0]) m = re.findall("[ \(\)<>]([a-zA-Z]{4})[ \(\)<>]", a) if m: mDict = fileMK.unsortedDictFromWords(m) p = outputPath + "task1" fileMK.writeTextToFile(str(sorted(mDict.keys())), p)
def task2(): # Task 2 w = [] for i in range(len(t)): w += t[i].split(" ") unsorted_dict = fileMK.unsortedDictFromWords(w) s = "There are " + str(len(unsorted_dict)) + " different Terms" print(s) path = outputPath + "task2.txt" fileMK.writeTextToFile(s, path)
def task1(): # Task 1 words = [] for i in range(len(t)): words += (t[i].split(" ")) sorted_dict = fileMK.sortedDictFromWords(words) s = "" for i in range(50): s += str(sorted_dict[i]) + "\n" s = s.replace("(", "").replace(")", "").replace("'","").replace(",",":") path = input(" Enter Path & Filename (like \"D:\\terms.txt\")\n") if path == "": path = outputPath + "task1.txt" fileMK.writeTextToFile(s, path)
def task4(): # Task 4 words = "" for i in range(len(t)): words += t[i] + "\n" # Task a print(" More than 3 Non-Space Characters in Brackets.") m = re.findall("[(]\S{3,}[)]", words.lower()) print(str(len(m)) + " Brackets found") path = outputPath + "task4a.txt" fileMK.writeTextToFile(str(m), path) # Task b print(" More than 1 consecutive Words in Uppercase.") m = re.findall("[A-Z]{2,}\s[A-Z]{2,}[\s[A-Z]{2,}]*", words.lower()) print(str(len(m)) + " Words found") path = outputPath + "task4b.txt" fileMK.writeTextToFile(str(m), path) # Task c print(" Numbers starting with \"#\".") m = re.findall("#[0-9,]+", words.lower()) print(str(len(m)) + " Numbers found") path = outputPath + "task4c.txt" fileMK.writeTextToFile(str(m), path) # Task d print(" Terms containing a Hyphen.") m = re.findall("[a-zA-Z]{3,}[-][a-zA-Z]{3,}", words.lower()) print(str(len(m)) + " Terms found") path = outputPath + "task4d.txt" fileMK.writeTextToFile(str(m), path)
def task4(): # Task 4 s = "" fcan = fileMK.get2FollowingTerms(t, "can") for i in range(len(fcan)): s+= "can " + fcan[i] + "\n" path = outputPath + "task4-can.txt" fileMK.writeTextToFile(s, path) s = "" fgeneral = fileMK.get2FollowingTerms(t, "general") for i in range(len(fgeneral)): s += "general " + fgeneral[i] + "\n" path = outputPath + "task4-general.txt" fileMK.writeTextToFile(s, path)
def task5(): # Task 5 tcan = fileMK.readFileToArray(outputPath + "task4-can.txt") dictcan = fileMK.sortedDictFromWords(tcan) s = "" for i in range(10): s += str(dictcan[i]).replace("can ","") + "\n" path = outputPath + "task5-can.txt" fileMK.writeTextToFile(s, path) tgeneral = fileMK.readFileToArray(outputPath + "task4-general.txt") dictgeneral = fileMK.sortedDictFromWords(tgeneral) s = "" for i in range(10): s += str(dictgeneral[i]).replace("general ","") + "\n" path = outputPath + "task5-general.txt" fileMK.writeTextToFile(s, path)
def task3(): # Task 3 words = [] for i in range(len(t)): words += t[i].lower().split(" ") unsorted_dict = fileMK.unsortedDictFromWords(words) try: i = int(input(" How many times should the term appear?\n")) except ValueError: print(" Invalid Value, assuming 51\n") i = 51 times = [key for key, value in unsorted_dict.items() if value == i] s = "" for j in range(len(times)): s += str(times[j]) + "; " n = str(len(s.split("; "))-1) print(" " + n + " Words found occuring " + str(i) + " time(s)") path = outputPath + "task3-" + str(i) + ".txt" fileMK.writeTextToFile(s, path)
def task6(): # Task 6 hamilton = fileMK.readFile(corpusPath + "Federalist Hamilton.txt") madison = fileMK.readFile(corpusPath + "Federalist Madison.txt") sorted_hamilton = fileMK.sortedDictFromWords(hamilton.lower().split(" ")) sorted_madison = fileMK.sortedDictFromWords(madison.lower().split(" ")) sHamilton = "" sMadison = "" for i in range(15): sHamilton += str(sorted_hamilton[i]) + "\n" sMadison += str(sorted_madison[i]) + "\n" sHamilton = sHamilton.replace("(", "").replace(")", "").replace("'","") sMadison = sMadison.replace("(", "").replace(")", "").replace("'","") h = "Hamilton:\n" + str(sHamilton) m = "Madison:\n" + str(sMadison) print(h) print(m) path = outputPath + "task6.txt" fileMK.writeTextToFile(h + "\n" + m, path)