def save_transition_probs(input_file): """ Computes and stores trigrams and their respective transition probabilities from an input file containing the trigrams """ # read counts file counter = Hmm(3) counter.read_counts(file('ner_rare.counts')) out_lines_list = [] l = input_file.readline() while l: line = l.strip() if line: # Nonempty line trigram = tuple(line.split()) # get transition probability of trigram prob = compute_transition_prob( counter.ngram_counts[1][(trigram[0], trigram[1])], counter.ngram_counts[2][trigram]) # get log probability log_prob = math.log(prob) l = line + " " + str(log_prob) out_lines_list.append(l) l = input_file.readline() out_lines = "\n".join(out_lines_list) # write trigrams and their log probs to file with open('5_1.txt', 'w') as out_file: out_file.write(out_lines)
class Tagger: def __init__(self, common_file, counts_file): self.common_words = get_common_words(common_file) self.hmm = Hmm(3) self.hmm.read_counts(counts_file)
def replace_rare(raw_data_file, raw_count_file, output_file, rare_counts = 5): # read in the raw counts from hmm fp = open(raw_count_file, 'r') hmm = Hmm(3) hmm.read_counts(fp) fp.close() # accumulate the word counts from emission_counts word_count = defaultdict(int) for word_tag in hmm.emission_counts: word_count[word_tag[0]] += hmm.emission_counts[word_tag] rare_words = set([word for word in word_count if word_count[word] < rare_counts]) #print rare_words # replace rare words with _RARE_ input = open(raw_data_file, 'r') output = open(output_file, 'w') for line in input: line = line.strip() if line: word, tag = line.split(" ") if word in rare_words: word_class = get_word_class(word) output.write(" ".join([word_class, tag])) #output.write(" ".join(['_RARE_', tag])) else: output.write(line) output.write("\n") input.close() output.close()
def gen_counts(input_path, output_path): if exists(output_path): return print 'Generating counts from: "%s"' % input_path counter = Hmm(3) counter.train(open(input_path, 'r')) counter.write_counts(open(output_path, 'w'))
def __init__(self, infile="ner_train.dat"): self.counter = Hmm(3) with open(infile) as f: self.counter.train(f) self.unigrams = {k[0]:v for k,v in self.counter.ngram_counts[0].iteritems()} #since the key is a one-word tuple self.bigrams = self.counter.ngram_counts[1] self.trigrams = self.counter.ngram_counts[2] self.words = [x[0] for x in self.counter.emission_counts.keys()]
class Hmm(object): def __init__(self, counts_file="gene.counts"): self.hmm = Hmm() self.hmm.read_counts(counts_file) def emission(self, x, y): pass
def problem4(count_file, dev_file): """Implement a simple named entity tagger and output predictions.""" try: infile = file(count_file, "r") except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) # Initialize a trigram counter counter = Hmm(3) # Read counts counter.read_counts(infile) # Write the predictions counter.write_predicts(dev_file, sys.stdout)
def baseline_tagger(counts_file, dev_file, rare_symbol="_RARE_"): """ Implements a baseline tagger that uses only the emission probabilities to assign tags and stores in a file. """ # get frequently occurring words word_count_dict = get_word_counts(file('ner_train.dat')) freq_words = [word for word in word_count_dict if word_count_dict[word] >= 5] # compute emission probs counter = Hmm(3) counter.read_counts(counts_file) emission_probs = compute_emission_probs(counter.emission_counts, counter.ngram_counts[0]) out_lines_list = [] l = dev_file.readline() while l: word = l.strip() if word: # Nonempty line # use emission probabilities of rare_symbol to assign tag and its probability for rare or unseen words. if word not in freq_words: tag = sorted(emission_probs[rare_symbol], key=emission_probs[word].get, reverse=True)[0] prob = emission_probs[rare_symbol][tag] # use emission probabilities of the word itself for frequently occurring words. else: tag = sorted(emission_probs[word], key=emission_probs[word].get, reverse=True)[0] prob = emission_probs[word][tag] log_prob = math.log(prob, 2) l = word + " " + tag + " " + str(log_prob) else: l = "" out_lines_list.append(l) l = dev_file.readline() out_lines = "\n".join(out_lines_list) out_lines = out_lines + "\n" # write words, corresponding tags and log probs to file with open('4_2.txt','w') as out_file: out_file.write(out_lines)
#! /usr/bin/python # __author__="Xiaochen Wei <*****@*****.**>" __date__ ="$Sep 20, 2014" from dataClean import * from count_freqs import Hmm import math # the file of train data trainingDataFilePath = "ner.counts" hmm = Hmm(3) inputFile = open(trainingDataFilePath, "r") hmm.read_counts(inputFile) class SimpleNamedEntityTagger: ''' get the Emission Parameter INPUT: the target word, and the status of target word ====================== RETURN: the emission of a target in specific targetType ''' def GetEmissionParameters(self, target, targetType): sumCount = 0 count = 0 if target not in [key[0] for key in hmm.emission_counts.keys()]:
import sys from collections import defaultdict import math from count_freqs import Hmm """ Implement the Viterbi algorithm to compute argmax (y1...yn) p(x1...xn, y1...yn) Your tagger should have the same basic functionality as the baseline tagger. Instead of emission probabilities the third column should contain the log-probability of the tagged sequence up to this word. """ if __name__ == "__main__": if len(sys.argv) != 3: # Expect exactly two arguments: the counts file and dev file usage() sys.exit(2) try: counts_file = file(sys.argv[1], "r") except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) counter = Hmm(3) # Read counts counter.read_counts(counts_file) counter.viterbi_read(sys.argv[2])
if word in infreq_words: if word.isupper(): f2.write("_UPPER_" + " " + parts[1] + "\n") elif word.isdigit(): f2.write("_DIGIT_" + " " + parts[1] + "\n") elif not word.isalpha(): f2.write("_NOTALPHA_" + " " + parts[1] + "\n") else: f2.write("_RARE_" + " " + parts[1] + "\n") else: f2.write(line) f2.close() def usage(): print """ python add_class.py [count_file] [training_data] """ if __name__ == "__main__": if len(sys.argv)!=3: # Expects two argument: original count file and training data file usage() sys.exit(2) counter = Hmm(3) # finds count information for words in file (em_count, ngram_count, infreq_word_set, all_tags, all_words) = counter.read_counts(sys.argv[1]) #produces new file with _RARE_ replace_class(sys.argv[2], infreq_word_set)
class Tagger(object): def __init__(self, infile="ner_train.dat"): self.counter = Hmm(3) with open(infile) as f: self.counter.train(f) self.unigrams = {k[0]:v for k,v in self.counter.ngram_counts[0].iteritems()} #since the key is a one-word tuple self.bigrams = self.counter.ngram_counts[1] self.trigrams = self.counter.ngram_counts[2] self.words = [x[0] for x in self.counter.emission_counts.keys()] """ conditional probability that the word maps to tag given the number of times the tag occurs """ def compute_emission(self, word, tag): em = self.counter.emission_counts if tag == '*': return 0 if (word,tag) in em: return em[(word,tag)]/float(self.unigrams[tag]) elif word in self.words: return 0 else: return em[('_RARE_',tag)]/float(self.unigrams[tag]) """ returns the trigram count over the bigram count, defaulting the dict gets so that there aren't division by 0 errors """ def compute_trigram(self,yi,y1,y2): return self.trigrams.get((y2,y1,yi),0)/float(self.bigrams.get((y2,y1),1)) """ basic file replacement, writes to a new file called rare-{infile} where infile is provided. Can pass a threshold of how many common_words is considered "rare" """ def replace_rare(self,infile,threshold=5): wordcounts = defaultdict(int) for tup in self.counter.emission_counts.iteritems(): wordcounts[tup[0][0]] += tup[1] # aggregates counts of words total, with any tag common_words = [k for k,v in wordcounts.iteritems() if v >= threshold] replaced = 0 f = open(infile) f2 = open(infile.replace('.dat','-rare.dat'), 'w') for line in f: if len(line.split(' ')) == 2: if line.split(' ')[0] not in common_words: # closed set, there are more rare than not rare, we know it's one or the other f2.write(line.replace(line.split(' ')[0], '_RARE_', 1)) replaced +=1 else: f2.write(line) else: f2.write(line) # maintain stops f.close() f2.close() """ returns a dictionary of relative probabilities for emission counts """ def tag_probabilities(self,word): counts = {tag:self.compute_emission(word,tag) for tag in self.unigrams} prob = lambda v: v/sum(counts.values()) if sum(counts.values()) != 0 else 0 return {k:prob(v) for k,v in counts.iteritems()} """ wrapper function for dynamic programming algorithm, writes to outfile """ def viterbi(self,infile,outfile): def write_to_pred_file(f,sentence): tag_seq = [" ".join(x) for x in self.tag_sequence(sentence)] #tuples of tag,probability for word,tag in itertools.izip(sentence,tag_seq): # word, tag, probability f.write('%s %s\n' % (word,tag)) f.write('\n') with open(infile) as f, open(outfile,"w") as f2: sentence = [] for line in f: if line == '\n': write_to_pred_file(f2,sentence) sentence = [] continue else: sentence.append(line.strip()) #write the last sentence to the file (if there is no newline -- will just return and escape if sentence is empty write_to_pred_file(f2,sentence) def tag_sequence(self,sentence): if len(sentence) == 0: return [] possible_tags = self.unigrams.keys() possible_tags.append('*') bp = {i:{} for i in range(len(sentence) + 1)} # initialization: pi(0,'*','*') = 1, pi(0,u,v) = 0 bp[0] = {t:('O',0) for t in itertools.product(possible_tags,repeat=2)} bp[0][('*','*')] = (1.0,1.0) # at idx 1, u can only be * for v in possible_tags: tag_max = ('sentinel',-1) #a real probability (since logs are only computed at end) will never be negative, so this will be reset tags = {} for w in possible_tags: tags[w] = bp[0][(w,'*')][1]*self.compute_trigram(v,w,'*')*self.compute_emission(sentence[0],v) if tags[w] > tag_max[1] and tags[w] != 0: tag_max = (w,tags[w]) bp[1][('*',v)] = tag_max if tag_max != ('sentinel',-1) else ('O',0) #default tag is no tag, so O -- no sequences with this u,v with a nonzero probability for i,word in enumerate(sentence[1:], start=2): #from 2...n for v,u in itertools.product(possible_tags,repeat=2): #same as nested for u in K, v in K tag_max = ('sentinel', -1) tags = {} for w in possible_tags: if (w,u) in bp[i-1]: tags[w] = bp[i-1][(w,u)][1]*self.compute_trigram(v,u,w)*self.compute_emission(word,v) if tags[w] > tag_max[1] and tags[w] != 0: tag_max = (w,tags[w]) bp[i][(u,v)] = tag_max if tag_max != ('sentinel',-1) else ('O',0) n = len(sentence) last = {(u,v): bp[n][(u,v)][1]*self.compute_trigram('STOP',v,u) for u,v in bp[n].keys()} yn1,yn = max(last, key=last.get) # max probability for sequence ending in STOP conf = last[(yn1,yn)] seq = [(yn,str(ln(conf))), (yn1,str(ln(conf)))] #sequence will be yn...y0 for i in xrange(len(sentence) - 2, 0, -1): u,v = tuple(x[0] for x in reversed(seq[-2:])) #previous two are yn-1, yn-2 prev = bp[i+2][(u,v)] seq.append((prev[0], str(ln(prev[1])))) return reversed(seq) #reversed yn...y0 is y0...yn
sys.stdout.write(line) if __name__ == "__main__": if len( sys.argv ) != 3: # Expect exactly two arguments: the counts and corresponding training data file usage() sys.exit(2) try: input = file(sys.argv[1], "r") output = sys.argv[2] except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) # Initialize a trigram counter counter = Hmm(3) # Read in counts counter.read_counts(input) # Filter words with count < 5 low_words = dict( (k, v) for k, v in counter.word_counts.iteritems() if v < 5) high_words = dict( (k, v) for k, v in counter.word_counts.iteritems() if v > 5) # Replace each instance of word in low_words with _RARE_ in training set replace_all(output, low_words, '_RARE_')
if __name__ == "__main__": if len(sys.argv ) != 3: # Expect exactly one argument: the training data file usage() sys.exit(2) try: counts_file = file(sys.argv[1], "r") test_file = file(sys.argv[2], "r") except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) # Initialize a trigram counter counter = Hmm(3) # Read in counts counter.read_counts(counts_file) # Iterate through words in test data and calculate the log probability of each tag. for line in test_file: word = line.strip() if word: # Nonempty line original_word = word # Check if word is absent in training set, if so, use _RARE_ if word not in counter.all_words or counter.word_counts[word] < 5: word = "_RARE_" # Initialize dict to hold emission values candidates = defaultdict(float)
if __name__ == "__main__": if len(sys.argv ) != 3: # Expect exactly one argument: the training data file usage() sys.exit(2) try: counts_file = file(sys.argv[1], "r") test_file = file(sys.argv[2], "r") except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) # Initialize a trigram counter counter = Hmm(3) # Read in counts counter.read_counts(counts_file) # Iterate over all test sentences test_sent_iterator = sent_iterator(word_iterator(test_file)) for sentence in test_sent_iterator: # Viterbi Algorithm n = len(sentence) pad_sent = (2) * ["*"] pad_sent.extend(sentence) pad_sent.append("STOP") # Initialize
out_lines = "\n".join(out_lines_list) out_lines = out_lines + "\n" # write to file with open('5_2.txt', 'w') as out_file: out_file.write(out_lines) if __name__ == "__main__": os.system('python 4_1.py') os.system('python count_freqs.py ner_train_rare.dat > ner_rare.counts') # get frequent words word_count_dict = get_word_counts(file('ner_train.dat')) freq_words = [ word for word in word_count_dict if word_count_dict[word] >= 5 ] # get transition and emission probs counter = Hmm(3) counter.read_counts(file('ner_rare.counts')) transition_probs = compute_transition_probs(counter.ngram_counts[1], counter.ngram_counts[2]) emission_probs = compute_emission_probs(counter.emission_counts, counter.ngram_counts[0]) # store tagged data with the log probs to file tagger(file('ner_dev.dat'), transition_probs, emission_probs, freq_words) os.system('python eval_ne_tagger.py ner_dev.key 5_2.txt')
def __read_counts(self, count_file): fp = open(count_file, 'r') hmm = Hmm(3) hmm.read_counts(fp) fp.close() return hmm
from collections import defaultdict from count_freqs import Hmm import math import sys def emission_probability(word, tag, emission_counts, ngram_counts): return emission_counts[(word, tag)] / ngram_counts[0][(tag,)] if __name__ == "__main__": counts_file = open(sys.argv[1]) sentences_file = open(sys.argv[2]) hmm = Hmm() hmm.read_counts(counts_file) emission_counts = hmm.emission_counts ngram_counts = hmm.ngram_counts entity_tags = hmm.all_states trained_words = defaultdict(int) infrequent_words = defaultdict(int) for word, tag in emission_counts: trained_words[word] += hmm.emission_counts[(word, tag)] for word in trained_words: if trained_words[word] < 5: infrequent_words[word] = 1 for word in infrequent_words:
#!/usr/bin/python import sys from count_freqs import Hmm countInput = file(sys.argv[1],"r") hmm = Hmm(3) hmm.read_counts(countInput) for tag in hmm.all_states: hmm.emission_counts[("_RARE_",tag)]=0 hmm.emission_counts[("_Numeric_",tag)]=0 hmm.emission_counts[("_AllCapitals_",tag)]=0 hmm.emission_counts[("_LastCapital_",tag)]=0 for key,value in hmm.emission_counts.items(): #print value if key[0] == "_RARE_": continue if value < 5: if key[0].isdigit(): hmm.emission_counts[("_Numeric_",key[1])] += value #print "%s delete %i to Numeric %i" %(key,value,hmm.emission_counts[("_Numeric_",key[1])]) elif key[0].isalpha() and key[0].isupper(): hmm.emission_counts[("_AllCapitals_",key[1])] += value #print "%s delete %i to Captital %i" %(key,value,hmm.emission_counts[("_AllCapitals_",key[1])]) elif key[0].isalpha() and key[0][-1].isupper(): #elif key[0][-1].isupper(): hmm.emission_counts[("_LastCapital_",key[1])] += value #print "%s delete %i to LastCaptital %i" %(key,value,hmm.emission_counts[("_LastCapital_",key[1])]) else:
else: return "I-GENE" def get_rare_words(d): temp_d = d.copy() O_words, GENE_words = set(), set() for key, value in d.iteritems(): if value < 5 and get_max_value(temp_d, key[0]) < 5: if get_most_tag(temp_d, key[0]) == "O": O_words.add(key[0]) else: GENE_words.add(key[0]) return (O_words, GENE_words) if __name__ == "__main__": counter = Hmm(3) counter.train(file("data/gene.train","r"), RARE=False) # print counter.emission_counts rare_words = get_rare_words(counter.emission_counts) # print len(rare_words[0]) #O_words = 19034 # print len(rare_words[1]) #GENE_words = 6231 with open("data/rare_words.pickle", "wb") as f: pickle.dump(rare_words, f)
for line in f.readlines(): word = line[:-1] if len(word) != 0: if word in keys: tag = viterbi(d[0], d[1], word) else: tag = viterbi(d[0], d[1], classify(word)) output.write("%s %s\n" % (word, tag)) d.append(tag) else: output.write("\n") d = deque(["*", "*"], maxlen=2) if __name__ == "__main__": counter = Hmm(3) counter.read_counts(file("outputs/p3_count.txt", "r")) bigram_counts = counter.ngram_counts[1] trigram_counts = counter.ngram_counts[2] keys = set() for k in counter.emission_counts.keys(): keys.add(k[0]) # FOR THE DEVELOPENT FILE write_tags("data/gene.dev", keys, file("outputs/gene_dev.p3.out", "w")) """ TO EVALUATE, RUN: >>> python eval_gene_tagger.py data/gene.key outputs/gene_dev.p3.out AND THE OUTPUT WILL BE: Found 404 GENEs. Expected 642 GENEs; Correct: 214.
max_prob = math.log(max_prob, 2) output_file.write("%s %s %f\n" % (word, max_tag, max_prob)) def usage(): print """ python simple_tagger.py [counts_file] [test_file] > [output_file] """ if __name__ == "__main__": if len(sys.argv) != 3: usage() sys.exit(2) try: counts_file = file(sys.argv[1], "r") test_file = file(sys.argv[2], "r") except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) # Initialize a trigram counter counter = Hmm(3) # Read counts counter.read_counts(counts_file) # Initialize a simple tagger tagger = SimpleTagger(counter) # Tag the data tagger.tag(test_file, sys.stdout)
def __init__(self, counts_file="gene.counts"): self.hmm = Hmm() self.hmm.read_counts(counts_file)
import sys import operator from collections import defaultdict from count_freqs import Hmm def p2_1emission (word,tag,hmm,countTag): #print "p2_1 " + word + " " + tag + " %i" %hmm.emission_counts[(word,tag)] if (word,tag) in hmm.emission_counts: return hmm.emission_counts[(word,tag)]/countTag[tag] else: return 0 if __name__ == "__main__": input = file(sys.argv[1],"r") model = Hmm(3) #print len(model.emission_counts) model.read_counts(input) #print len(model.emission_counts #if ("BACKGROUND","O") in model.emission_counts: #print "yes" #print model.all_states testFile = file(sys.argv[2],"r") tagsNum = len(model.all_states) countTag = dict.fromkeys(model.all_states,0) #print countTag for (word,tag) in model.emission_counts: countTag[tag] += model.emission_counts[(word,tag)] #print countTag
word = " ".join(fields[:-1]) # replace word with its category if frequency < count_thresh if word_count_dict[word] < count_thresh: line = " ".join([get_category(word), fields[-1]]) out_lines_list.append(line) l = in_file.readline() out_lines = "\n".join(out_lines_list) out_file.write(out_lines) if __name__ == "__main__": # replace infrequent words with categories and write to file replace_infrequent_words_with_categories(file('ner_train.dat'), file('ner_train_cats.dat', 'w')) # generate counts file os.system('python count_freqs.py ner_train_cats.dat > ner_cats.counts') # get frequent words word_count_dict = get_word_counts(file('ner_train.dat')) freq_words = [word for word in word_count_dict if word_count_dict[word] >= 5] # get transition and emission probabilities counter = Hmm(3) counter.read_counts(file('ner_cats.counts')) transition_probs = compute_transition_probs(counter.ngram_counts[1], counter.ngram_counts[2]) emission_probs = compute_emission_probs(counter.emission_counts, counter.ngram_counts[0]) # store tagged data with the log probs to file tagger(file('ner_dev.dat'), transition_probs, emission_probs, freq_words) os.system('python eval_ne_tagger.py ner_dev.key 6.txt')
from collections import defaultdict from count_freqs import Hmm import math import sys def emission_probability(word, tag, emission_counts, ngram_counts): return emission_counts[(word, tag)] / ngram_counts[0][(tag, )] if __name__ == "__main__": counts_file = open(sys.argv[1]) sentences_file = open(sys.argv[2]) hmm = Hmm() hmm.read_counts(counts_file) emission_counts = hmm.emission_counts ngram_counts = hmm.ngram_counts entity_tags = hmm.all_states trained_words = defaultdict(int) infrequent_words = defaultdict(int) for word, tag in emission_counts: trained_words[word] += hmm.emission_counts[(word, tag)] for word in trained_words: if trained_words[word] < 5: infrequent_words[word] = 1
with open(file, "r") as f: f2 = open("ner_train_rare.dat", "w") for line in f: parts = line.strip().split(" ") word = parts[0] if word in infreq_words: f2.write("_RARE_" + " " + parts[1] + "\n") else: f2.write(line) f2.close() def usage(): print """ python add_rare.py [count_file] [training_data] """ if __name__ == "__main__": if len(sys.argv)!=3: # Expects two argument: original count file and training data file usage() sys.exit(2) counter = Hmm(3) # finds count information for words in file (em_count, ngram_count, infreq_word_set, all_tags, all_words) = counter.read_counts(sys.argv[1]) #produces new file with _RARE_ replace_rare(sys.argv[2], infreq_word_set)
""" if __name__ == "__main__": if len(sys.argv) < 4: # Expects atleast 3 arguments usage() sys.exit(2) try: input = file(sys.argv[1], "r") except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) # Initialize a trigram counter counter = Hmm(3) if (len(sys.argv) == 4): #to obtain original counts (em_count1, ngram_count1, infreq_word1, all_tags1, all_words1) = counter.read_counts(sys.argv[3]) #to process new data (em_count, ngram_count, infreq_word, all_tags, all_words) = counter.read_counts(sys.argv[1]) #to obtain emission prob emission_probabilities = emission_parameters(sys.argv[2], em_count, ngram_count[0], all_tags, all_words1, infreq_word1) else: #to process new data (em_count, ngram_count, infreq_word, all_tags, all_words) = counter.read_counts(sys.argv[1])
def train_ngram_and_emission_freq_from_corpus_file(self, corpus_file): counter = Hmm(3) counter.train(corpus_file) self.emission_counts = counter.emission_counts self.ngram_counts = counter.ngram_counts
output2.flush() ################################################### ################################################### print("\n2. Generate word count file.\n") freqs_input = open('gene.replace.train',"r") freqs_output = open('gene.counts', "w") # Initialize a trigram counter counter = Hmm(3) # Collect counts counter.train(freqs_input) # Write the counts counter.write_counts(freqs_output) freqs_output.flush() ################################################### ################################################### print("\n3. Tag dev corpus with Viterbi tagger.\n")
if __name__ == "__main__": if len(sys.argv ) != 3: # Expect exactly two arguments: the counts and trigram file usage() sys.exit(2) try: counts_file = file(sys.argv[1], "r") trigram_file = sys.argv[2] except IOError: sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) sys.exit(1) # Initialize a trigram counter counter = Hmm(3) # Read in counts counter.read_counts(count_file) # Iterate through trigrams in trigram_file and calculate the log probability of each trigram. for line in test_file: trigram = line.strip().split(" ") if trigram: # Nonempty line prob = counter.calc_mle(trigram) # Get the log of the probability log_prob = math.log(prob) # Write log probability to output file sys.stdout.write(