with open("awards.csv", "w") as out_file: out_file.write("Org,# of Awards,Total Award Amount\n") for rec in award_recs: out_file.write(rec + "," + str(award_recs[rec][0]) + "," + str(award_recs[rec][1]) + "\n") # Extract abstract ID from filename abstractID = fileid.replace(".txt", "") abstractID = re.split("[\/]", abstractID) abstractID = abstractID[2] # Reset counter for the number of sentences in the abstract sent_num = 0 # Find abstract by looping through paragraphs, counting them, and applying the GetAbstract function para_num = -1 for para in corpus.paras(fileid): para_num += 1 GetAbstract() # If the abstract is blank, add 1 to the distribution for 0 sentences if abstract == []: if 0 not in sent_dist: sent_dist[0] = 1 else: sent_dist[0] += 1 # Loop through each sentence in the abstract for line in abstract: # Iterate the sentence counter sent_num += 1
from TreeTaggerWrapper import treetaggerwrapper #1) build a TreeTagger wrapper: tagger = treetaggerwrapper.TreeTagger(TAGLANG='en',TAGDIR='D:/Programme/TreeTagger') #2) tag your text. tags = tagger.TagText("This is a very short text to tag.") #3) use the tags list... (list of string output from TreeTagger). print tags # Check, whether the format of the tagged postings is good for the tagged corpus reader # p.51 NLTK Cookbook input_directory = path from nltk.corpus.reader import TaggedCorpusReader reader = TaggedCorpusReader(input_directory, r'.*\.txt') reader.words() # ['The', 'expense', 'and', 'time', 'involved', 'are', ...] reader.tagged_words() reader.sents() reader.tagged_sents() reader.paras() reader.tagged_paras() # testing the import # #import sys import myMath print myMath.add(4,5) print myMath.division(4, 2) print myMath.multiply(10, 5) print myMath.fibonacci(8) print myMath.squareroot(48)