def update_bigram_hash(self,word_array,bigrams_hash): bigrams=utils.generate_bigrams(word_array) for key in bigrams: if key in bigrams_hash.keys(): bigrams_hash[key]+=1 else: bigrams_hash[key]={} bigrams_hash[key]=1
def identify_interaction(self,path_to_test_file): document_data=open(path_to_test_file,'rb') xml_data=parse(document_data) sentences = xml_data.getElementsByTagName("sentence") result={} for sentence in sentences: entity_collection={} sentence_attrs = dict(sentence.attributes.items()) sentence_text=sentence_attrs["text"] sentence_id=sentence_attrs["id"] entities = sentence.getElementsByTagName("entity") for entity in entities: entity_attrs = dict(entity.attributes.items()) id=entity_attrs["id"] text=(entity_attrs["text"]).lower() type=(entity_attrs["type"]).lower() entity_collection[id]={} entity_collection[id]={"text":text,"type":type} words=utils.tokenize_string_without_punctuations(sentence_text) bigrams=utils.generate_bigrams(words) trigrams=utils.generate_trigrams(words) result_found=0 for bigram in bigrams: if bigram in self.bigrams_based_on_score.keys(): result[sentence_id]={} result[sentence_id]=self.bigrams_based_on_score[bigram] result_found=1 print "bigram as a result of which classification is done:"+bigram break if result_found!=1: result[sentence_id]="false" #pprint.pprint(self.bigrams_based_on_score) document_data.close() return result