def linguistic_features_vector(sentdict, aux, features): vector = [] tree = sentdict.get_nltk_tree() subtree_positions = nt.get_smallest_subtree_positions(tree) if 'my_features' in features: vector += my_features_vector(sentdict, aux, tree, subtree_positions) if 'my_rules' in features: vector += my_rules_vector(sentdict, aux, tree, subtree_positions) if 'old_rules' in features: vector += old_rules_vector(sentdict, aux, tree, subtree_positions) if 'square_rules' in features: vector_length = len(vector) for i in range(0, vector_length): for j in range(i+1, vector_length): vector.append(bool_to_int(int_to_bool(vector[i]) and int_to_bool(vector[j]))) if 'combine_aux_type' in features: vector_length = len(vector) aux_type = aux.type bools = [aux_type == 'modal', aux_type == 'be', aux_type == 'have', aux_type == 'do', aux_type == 'to', aux_type == 'so'] for i in range(0, vector_length): for b in bools: vector.append(bool_to_int(b and int_to_bool(vector[i]))) return vector
def test_rules(self, train_auxs): f = lambda x: 1 if x else 0 predictions = [] for i in range(len(train_auxs)): aux = train_auxs[i] sendict = self.sentences[aux.sentnum] tree = sendict.get_nltk_tree() word_subtree_positions = nt.get_smallest_subtree_positions(tree) if aux.type == 'modal': predictions.append( f(wc.modal_rule(sendict, aux, tree, word_subtree_positions))) elif aux.type == 'be': predictions.append(f(wc.be_rule(sendict, aux))) elif aux.type == 'have': predictions.append(f(wc.have_rule(sendict, aux))) elif aux.type == 'do': predictions.append( f(wc.do_rule(sendict, aux, tree, word_subtree_positions))) elif aux.type == 'so': predictions.append(f(wc.so_rule(sendict, aux))) elif aux.type == 'to': predictions.append(f(wc.to_rule(sendict, aux))) return predictions
def linguistic_features_vector(sentdict, aux, features): vector = [] tree = sentdict.get_nltk_tree() subtree_positions = nt.get_smallest_subtree_positions(tree) if 'my_features' in features: vector += my_features_vector(sentdict, aux, tree, subtree_positions) if 'my_rules' in features: vector += my_rules_vector(sentdict, aux, tree, subtree_positions) if 'old_rules' in features: vector += old_rules_vector(sentdict, aux, tree, subtree_positions) if 'square_rules' in features: vector_length = len(vector) for i in range(0, vector_length): for j in range(i + 1, vector_length): vector.append( bool_to_int( int_to_bool(vector[i]) and int_to_bool(vector[j]))) if 'combine_aux_type' in features: vector_length = len(vector) aux_type = aux.type bools = [ aux_type == 'modal', aux_type == 'be', aux_type == 'have', aux_type == 'do', aux_type == 'to', aux_type == 'so' ] for i in range(0, vector_length): for b in bools: vector.append(bool_to_int(b and int_to_bool(vector[i]))) return vector
def test_my_rules(self, original_rules=False, idxs=None): self.predictions = [] print "Length of test set: %d, length of All_auxs-training vectors: %d" % ( len(self.test_classes), len(self.all_auxiliaries) - len(self.train_vectors), ) for i in range(self.pre_oversample_length, len(self.all_auxiliaries)): if idxs == None or i in idxs: aux = self.all_auxiliaries.get_aux(i) sendict = self.sentences.get_sentence(aux.sentnum) tree = sendict.get_nltk_tree() word_subtree_positions = nt.get_smallest_subtree_positions(tree) if not original_rules: if aux.type == "modal": self.predictions.append( vc.bool_to_int(wc.modal_rule(sendict, aux, tree, word_subtree_positions)) ) elif aux.type == "be": self.predictions.append(vc.bool_to_int(wc.be_rule(sendict, aux))) elif aux.type == "have": self.predictions.append(vc.bool_to_int(wc.have_rule(sendict, aux))) elif aux.type == "do": self.predictions.append(vc.bool_to_int(wc.do_rule(sendict, aux, tree, word_subtree_positions))) elif aux.type == "so": self.predictions.append(vc.bool_to_int(wc.so_rule(sendict, aux))) elif aux.type == "to": self.predictions.append(vc.bool_to_int(wc.to_rule(sendict, aux))) else: auxidx = aux.wordnum if aux.type == "modal": self.predictions.append( vc.bool_to_int(dv.modalcheck(sendict, auxidx, tree, word_subtree_positions)) ) elif aux.type == "be": self.predictions.append( vc.bool_to_int(dv.becheck(sendict, auxidx, tree, word_subtree_positions)) ) elif aux.type == "have": self.predictions.append( vc.bool_to_int(dv.havecheck(sendict, auxidx, tree, word_subtree_positions)) ) elif aux.type == "do": self.predictions.append( vc.bool_to_int(dv.docheck(sendict, auxidx, tree, word_subtree_positions)) ) elif aux.type == "so": self.predictions.append( vc.bool_to_int(dv.socheck(sendict, auxidx, tree, word_subtree_positions)) ) elif aux.type == "to": self.predictions.append( vc.bool_to_int(dv.tocheck(sendict, auxidx, tree, word_subtree_positions)) )
def myfeaturesvector(sentdict, idx, features): vector = [] tree = NT.maketree(sentdict["tree"][0]) subtrees = NT.getsmallestsubtrees(tree) subtree_positions = NT.get_smallest_subtree_positions(tree, subtree_list=subtrees) aux = sentdict["lemmas"][idx] if "my_features" in features: vector.append(truth(DV.auxccommandsverb(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.auxccommandsverbthatcomesafter(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.auxisccommandedbyverb(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.auxislocallyccommandedbyverb(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.auxlocallyccommandsverb(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.isccommandedbycontinuationword(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.nexttopunct(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.isfollowedbypunct(sentdict, idx, end=["."]))) vector.append(truth(DV.previouswordisasorsoorthan(sentdict["words"], idx))) vector.append(truth(DV.thesamecheck(sentdict["words"], idx))) vector.append(truth(DV.toprecedesaux(sentdict, idx))) vector.append(truth(DV.verbfollowsaux(sentdict, idx))) # TODO: added this new feature! vector.append(truth(DV.nextwordistoo(sentdict, idx))) if "my_rules" in features: vector.append(truth(aux in DV.MODALS and DV.modalcheck(sentdict, idx, tree, subtree_positions))) vector.append(truth(aux in DV.BE and DV.becheck(sentdict, idx, tree, subtree_positions))) vector.append(truth(aux in DV.HAVE and DV.havecheck(sentdict, idx, tree, subtree_positions))) vector.append(truth(aux in DV.DO and DV.docheck(sentdict, idx, tree, subtree_positions))) vector.append(truth(aux in DV.TO and DV.tocheck(sentdict, idx, tree, subtree_positions))) vector.append(truth(aux in DV.SO and DV.socheck(sentdict, idx, tree, subtree_positions))) # This adds a new layer of features by combining all of the ones I had. if "square_rules" in features: size = len(vector) for i in range(0, size): for j in range(0, size): if i != j: vector.append(truth(untruth(vector[i]) and untruth(vector[j]))) if "combine_aux_type" in features: bools = [aux in DV.MODALS, aux in DV.BE, aux in DV.HAVE, aux in DV.DO, aux in DV.TO, aux in DV.SO] vec = [v for v in vector] for v in vec: for b in bools: vector.append(truth(untruth(v) and b)) return vector
def testmyrules(classifier, section_start, section_end): gs_vector = classifier.getgsdata(section_start, section_end) aux_start, aux_end = classifier.section_split[ section_start], classifier.section_split[section_end] my_rules_return_vector = [] count = 0 for sentdict in classifier.each_sentence.sentences: for i in range(0, len(sentdict['lemmas'])): word = sentdict['lemmas'][i] if isauxiliary(sentdict, i): count += 1 if aux_start < count <= aux_end: tree = NT.maketree(sentdict['tree'][0]) subtree_positions = NT.get_smallest_subtree_positions(tree) if word in MODALS: my_rules_return_vector.append( truth( modalcheck(sentdict, i, tree, subtree_positions)) ) #Todo: I modified these b/c they were incorrectly written. elif word in BE: my_rules_return_vector.append( truth(becheck(sentdict, i, tree, subtree_positions))) elif word in HAVE: my_rules_return_vector.append( truth( havecheck(sentdict, i, tree, subtree_positions))) elif word in DO: my_rules_return_vector.append( truth(docheck(sentdict, i, tree, subtree_positions))) elif word in TO: my_rules_return_vector.append( truth(tocheck(sentdict, i, tree, subtree_positions))) elif word in SO: my_rules_return_vector.append( truth(socheck(sentdict, i, tree, subtree_positions))) classifier.compare(gs_vector, my_rules_return_vector, section_start - 1, verbose=False)
def test_rules(self, train_auxs): f = lambda x: 1 if x else 0 predictions = [] for i in range(len(train_auxs)): aux = train_auxs[i] sendict = self.sentences[aux.sentnum] tree = sendict.get_nltk_tree() word_subtree_positions = nt.get_smallest_subtree_positions(tree) if aux.type == 'modal': predictions.append(f(wc.modal_rule(sendict, aux, tree, word_subtree_positions))) elif aux.type == 'be': predictions.append(f(wc.be_rule(sendict, aux))) elif aux.type == 'have': predictions.append(f(wc.have_rule(sendict, aux))) elif aux.type == 'do': predictions.append(f(wc.do_rule(sendict, aux, tree, word_subtree_positions))) elif aux.type == 'so': predictions.append(f(wc.so_rule(sendict, aux))) elif aux.type == 'to': predictions.append(f(wc.to_rule(sendict, aux))) return predictions
def testmyrules(classifier, section_start, section_end): gs_vector = classifier.getgsdata(section_start, section_end) aux_start,aux_end = classifier.section_split[section_start], classifier.section_split[section_end] my_rules_return_vector = [] count = 0 for sentdict in classifier.each_sentence.sentences: for i in range(0,len(sentdict['lemmas'])): word = sentdict['lemmas'][i] if isauxiliary(sentdict, i): count += 1 if aux_start < count <= aux_end: tree = NT.maketree(sentdict['tree'][0]) subtree_positions = NT.get_smallest_subtree_positions(tree) if word in MODALS: my_rules_return_vector.append(truth(modalcheck(sentdict, i, tree, subtree_positions))) #Todo: I modified these b/c they were incorrectly written. elif word in BE: my_rules_return_vector.append(truth(becheck(sentdict, i, tree, subtree_positions))) elif word in HAVE: my_rules_return_vector.append(truth(havecheck(sentdict, i, tree, subtree_positions))) elif word in DO: my_rules_return_vector.append(truth(docheck(sentdict, i, tree, subtree_positions))) elif word in TO: my_rules_return_vector.append(truth(tocheck(sentdict, i, tree, subtree_positions))) elif word in SO: my_rules_return_vector.append(truth(socheck(sentdict, i, tree, subtree_positions))) classifier.compare(gs_vector, my_rules_return_vector, section_start-1, verbose=False)
def myfeaturesvector(sentdict, idx, features): vector = [] tree = NT.maketree(sentdict['tree'][0]) subtrees = NT.getsmallestsubtrees(tree) subtree_positions = NT.get_smallest_subtree_positions( tree, subtree_list=subtrees) aux = sentdict['lemmas'][idx] if 'my_features' in features: vector.append( truth(DV.auxccommandsverb(sentdict, idx, tree, subtree_positions))) vector.append( truth( DV.auxccommandsverbthatcomesafter(sentdict, idx, tree, subtree_positions))) vector.append( truth( DV.auxisccommandedbyverb(sentdict, idx, tree, subtree_positions))) vector.append( truth( DV.auxislocallyccommandedbyverb(sentdict, idx, tree, subtree_positions))) vector.append( truth( DV.auxlocallyccommandsverb(sentdict, idx, tree, subtree_positions))) vector.append( truth( DV.isccommandedbycontinuationword(sentdict, idx, tree, subtree_positions))) vector.append( truth(DV.nexttopunct(sentdict, idx, tree, subtree_positions))) vector.append(truth(DV.isfollowedbypunct(sentdict, idx, end=['.']))) vector.append( truth(DV.previouswordisasorsoorthan(sentdict['words'], idx))) vector.append(truth(DV.thesamecheck(sentdict['words'], idx))) vector.append(truth(DV.toprecedesaux(sentdict, idx))) vector.append(truth(DV.verbfollowsaux(sentdict, idx))) # TODO: added this new feature! vector.append(truth(DV.nextwordistoo(sentdict, idx))) if 'my_rules' in features: vector.append( truth(aux in DV.MODALS and DV.modalcheck(sentdict, idx, tree, subtree_positions))) vector.append( truth(aux in DV.BE and DV.becheck(sentdict, idx, tree, subtree_positions))) vector.append( truth(aux in DV.HAVE and DV.havecheck(sentdict, idx, tree, subtree_positions))) vector.append( truth(aux in DV.DO and DV.docheck(sentdict, idx, tree, subtree_positions))) vector.append( truth(aux in DV.TO and DV.tocheck(sentdict, idx, tree, subtree_positions))) vector.append( truth(aux in DV.SO and DV.socheck(sentdict, idx, tree, subtree_positions))) # This adds a new layer of features by combining all of the ones I had. if 'square_rules' in features: size = len(vector) for i in range(0, size): for j in range(0, size): if i != j: vector.append( truth(untruth(vector[i]) and untruth(vector[j]))) if 'combine_aux_type' in features: bools = [ aux in DV.MODALS, aux in DV.BE, aux in DV.HAVE, aux in DV.DO, aux in DV.TO, aux in DV.SO ] vec = [v for v in vector] for v in vec: for b in bools: vector.append(truth(untruth(v) and b)) return vector
def test_my_rules(self, original_rules=False, idxs=None): self.predictions = [] print 'Length of test set: %d, length of All_auxs-training vectors: %d' % ( len(self.test_classes), len(self.all_auxiliaries) - len(self.train_vectors)) for i in range(self.pre_oversample_length, len(self.all_auxiliaries)): if idxs == None or i in idxs: aux = self.all_auxiliaries.get_aux(i) sendict = self.sentences.get_sentence(aux.sentnum) tree = sendict.get_nltk_tree() word_subtree_positions = nt.get_smallest_subtree_positions( tree) if not original_rules: if aux.type == 'modal': self.predictions.append( vc.bool_to_int( wc.modal_rule(sendict, aux, tree, word_subtree_positions))) elif aux.type == 'be': self.predictions.append( vc.bool_to_int(wc.be_rule(sendict, aux))) elif aux.type == 'have': self.predictions.append( vc.bool_to_int(wc.have_rule(sendict, aux))) elif aux.type == 'do': self.predictions.append( vc.bool_to_int( wc.do_rule(sendict, aux, tree, word_subtree_positions))) elif aux.type == 'so': self.predictions.append( vc.bool_to_int(wc.so_rule(sendict, aux))) elif aux.type == 'to': self.predictions.append( vc.bool_to_int(wc.to_rule(sendict, aux))) else: auxidx = aux.wordnum if aux.type == 'modal': self.predictions.append( vc.bool_to_int( dv.modalcheck(sendict, auxidx, tree, word_subtree_positions))) elif aux.type == 'be': self.predictions.append( vc.bool_to_int( dv.becheck(sendict, auxidx, tree, word_subtree_positions))) elif aux.type == 'have': self.predictions.append( vc.bool_to_int( dv.havecheck(sendict, auxidx, tree, word_subtree_positions))) elif aux.type == 'do': self.predictions.append( vc.bool_to_int( dv.docheck(sendict, auxidx, tree, word_subtree_positions))) elif aux.type == 'so': self.predictions.append( vc.bool_to_int( dv.socheck(sendict, auxidx, tree, word_subtree_positions))) elif aux.type == 'to': self.predictions.append( vc.bool_to_int( dv.tocheck(sendict, auxidx, tree, word_subtree_positions)))