def parse_using_stanfordparser(tokenized_sent, display_tree=False, printNP=False, printLeave=False): result = stanford_parser.tagged_parse(tokenized_sent) for item in result: # print item if display_tree: Tree.draw(item) if printNP: NPs = list( Tree.subtrees( item, filter=lambda x: x.label() == 'NP' and x.height() <= 6)) for n in NPs: if printLeave: candidate = Tree.leaves(n) s = ' '.join(candidate) if len(candidate) == 1: if re.search(re.compile(r'[A-Z_-]+', re.X), s): print s else: print s else: tags = [] for t in Tree.subtrees(n): if t.label() not in ['NP', 'S', 'VP']: tags.append(t.label()) tagged = [] for w in range(len(Tree.leaves(n))): tagged.append( (Tree.leaves(n)[w], tags[w].encode('gbk'))) regexp_ner_m2(regexp_grammar, tagged)
def tree(): from nltk.tree import Tree print(Tree(1, [2, Tree(3, [4]), 5])) vp = Tree('VP', [Tree('V', ['saw']), Tree('NP', ['him'])]) s = Tree('S', [Tree('NP', ['I']), vp]) print(s) s.draw()
def visualize_sentence_tree(sentence_tree): processed_tree = process_sentence_tree(sentence_tree) processed_tree = [ Tree(item[0], [Tree(x[1], [x[0]]) for x in item[1]]) for item in processed_tree ] tree = Tree('S', processed_tree) tree.draw()
def visualize_sentence_tree(sentence_tree): processed_tree = process_sentence_tree(sentence_tree) processed_tree = [ Tree( item[0], [ Tree(x[1], [x[0]]) for x in item[1] ] ) for item in processed_tree ] tree = Tree('S', processed_tree ) tree.draw()
def translate(tree: Tree, translation_rules: list, draw=False): put_left = list( filter( lambda i: isinstance(tree[i], Tree) and tree[i].label() in translation_rules and not 'put_left' in locals(), tree.treepositions()))[0] if len(tree) != 0 else [] if len(put_left) != 0: tree = apply_translation(put_left, tree) if tree[tree.treepositions()[-2]].label() not in [ Nonterminal("AUX"), Nonterminal("VERB") ]: tree = apply_translation(tree.treepositions()[-2], tree) if draw: tree.draw() return tree
def read(parser, sentence=None, pos=None, critical=None, actions=actions, blind_actions=actions, word_freq=word_freq, label_freq=label_freq, strength_of_association={}, decmem={}, lexical=True, visual=True, syntactic=True, reanalysis=True, prints=True): """ Read a sentence. :param sentence: what sentence should be read (list). :param pos: what pos should be used (list, matching in length with sentence). :param actions: dataframe of actions :param lexical - should lexical information affect reading time? :param visual - should visual information affect reading time? :param syntactic - should syntactic information affect reading time? :param reanalysis - should reanalysis of parse affect reading time? """ parser.set_decmem(decmem) # TODO: dont remove?? tobe_removed = { i for i in range(len(sentence)) if (re.match("[:]+", sentence[i]) or sentence[i] == "'s") and i != len(sentence) - 1 } #remove what is not a full word print(sentence) for x in tobe_removed: print(sentence[x]) critical_rules = dict() # for critical sentences you can assume that specific rules apply to ensure that parsing of gp sentences proceeds correctly # for example for sentence 'the horse raced past the barn fell', the following would work on the noun horse (uncomment if using) #critical_rules = {'1': [['reduce_unary', 'NP_BAR'], ['reduce_binary', 'NP'], ['shift', "''"]]} if not lexical: for x in parser.decmem: parser.decmem.activations[ x] = 100 #this is to nulify the effect of word retrieval to almost 0 parser.retrievals = {} parser.set_retrieval("retrieval") parser.visbuffers = {} parser.goals = {} parser.set_goal("g") parser.set_goal(name="imaginal", delay=0) parser.set_goal(name="imaginal_reanalysis", delay=0) parser.set_goal("word_info") stimuli = [{} for i in range(len(sentence))] pos_word = 10 environment.current_focus = (pos_word + 7 + 7 * visual_effect(sentence[0], visual), 180) pos_words = [] for x in range(41): #this removes any move eyes created previously; we assume that no sentence is longer than 20 words parser.productionstring(name="move eyes" + str(x), string=""" =g> isa reading state dummy ==> =g> isa reading state dummy""") # create fixed rules for eye movements for i, word in enumerate(sentence): pos_word += 7 + 7 * visual_effect(word, visual) pos_words.append((pos_word, 180)) for j in range(len(stimuli)): stimuli[j].update({ i: { 'text': word, 'position': (pos_word, 180), 'vis_delay': visual_effect(word, visual) } }) if i < len(sentence) - 3: parser.productionstring(name="move eyes" + str(i), string=""" =g> isa reading state move_eyes position """ + str(i) + """ ?manual> preparation free processor free ==> =imaginal> isa action_chunk WORD_NEXT0_LEX """ + '"' + str(sentence[i + 2]) + '"' + """ WORD_NEXT0_POS """ + str(pos[i + 2]) + """ =g> isa reading state reading_word position """ + str(i + 1) + """ tag """ + str(pos[i + 1]) + """ ?visual_location> attended False +visual_location> isa _visuallocation screen_x """ + str(pos_word + 7 + 7 * visual_effect(sentence[i + 1], visual)) + """ screen_y 180 ~visual>""") elif i < len(sentence) - 2: parser.productionstring(name="move eyes" + str(i), string=""" =g> isa reading state move_eyes position """ + str(i) + """ ?manual> preparation free processor free ==> =imaginal> isa action_chunk WORD_NEXT0_LEX """ + '"' + str(sentence[i + 2]) + '"' + """ WORD_NEXT0_POS """ + str(pos[i + 2]) + """ =g> isa reading state reading_word position """ + str(i + 1) + """ tag """ + str(pos[i + 1]) + """ ?visual_location> attended False +visual_location> isa _visuallocation screen_x """ + str(pos_word + 7 + 7 * visual_effect(sentence[i + 1], visual)) + """ screen_y 180 ~visual>""") elif i < len(sentence) - 1: parser.productionstring(name="move eyes" + str(i), string=""" =g> isa reading state move_eyes position """ + str(i) + """ ?manual> preparation free ==> =imaginal> isa action_chunk WORD_NEXT0_LEX None =g> isa reading state reading_word position """ + str(i + 1) + """ tag """ + str(pos[i + 1]) + """ ?visual_location> attended False +visual_location> isa _visuallocation screen_x """ + str(pos_word + 7 + 7 * visual_effect(sentence[i + 1], visual)) + """ screen_y 180 ~visual>""") if prints: print(sentence) parser.goals["g"].add( actr.chunkstring(string=""" isa reading state reading_word position 0 tag """ + str(pos[0]))) parser.goals["imaginal"].add( actr.chunkstring(string=""" isa action_chunk TREE1_LABEL NOPOS TREE1_HEAD noword TREE2_LABEL xxx TREE2_HEAD xxx TREE3_LABEL xxx TREE3_HEAD xxx ANTECEDENT_CARRIED NO WORD_NEXT0_LEX """ + '"' + str(sentence[1]) + '"' + """ WORD_NEXT0_POS """ + str(pos[1]))) # start a dictionary that will collect all created structures, and a list of built constituents constituents = {} built_constituents = [(Tree("xxx", []), (None, "xxx")), (Tree("xxx", []), (None, "xxx")), (Tree("NOPOS", []), (None, "noword"))] final_tree = Tree("X", []) if prints: parser_sim = parser.simulation( realtime=False, gui=False, trace=True, environment_process=environment.environment_process, stimuli=stimuli, triggers='space', times=40) else: parser_sim = parser.simulation( realtime=False, gui=True, trace=False, environment_process=environment.environment_process, stimuli=stimuli, triggers='space', times=40) antecedent_carried = "NO" what_antecedent_carried = None eyemove_times = [] #reaction times per word reanalysis_list, words_list, activations_list, agreeing_actions_list, matching_fs_list, total_fan_list, actions_list = [], [], [], [], [], [], [] #collects total activation of rules, agreeing_actions... per sentence (used to find out what plays a role in syntactic parsing for RTs) wh_gaps_list = [] word_parsed = 0 last_time = 0 activations, agreeing_actions, matching_fs, total_fan = [], [], [], [ ] #collects total activation of rules, agreeing_actions... per word (used to find out what plays a role in syntactic parsing for RTs) retrieve_wh_reanalysis = None while True: try: parser_sim.step() #print(parser_sim.current_event) except simpy.core.EmptySchedule: eyemove_times = [ 10 for _ in sentence ] #if sth goes wrong, it's probably because it got stuck somewhere; in that case report time-out time per word (40 s) or nan break if parser_sim.show_time() > 60: eyemove_times = [ 10 for _ in sentence ] #this takes care of looping or excessive time spent - break if you loop (40 s should be definitely enough to move on) break if re.search("^SHIFT COMPLETE", str(parser_sim.current_event.action)): current_word_focused = pos_words.index( tuple(environment.current_focus)) extra_rule_time = parser.model_parameters[ "latency_factor"] * np.exp( -parser.model_parameters["latency_exponent"] * np.mean(activations) / 10) # two things play a role - number of matching features; fan of each matching feature; explore these two separately if len(eyemove_times) not in tobe_removed: eyemove_times.append(parser_sim.show_time() + extra_rule_time - last_time) else: tobe_removed.remove(len(eyemove_times)) for i in range(word_parsed + 1, current_word_focused): eyemove_times.append(0) #eyemove_times.append((parser_sim.show_time() + extra_rule_time - last_time)/(current_word_focused-word_parsed)) last_time = parser_sim.show_time() word_parsed = current_word_focused if word_parsed >= len(sentence): if len(eyemove_times) not in tobe_removed: eyemove_times.append(parser_sim.show_time() - last_time) break #this below implements carrying out an action if re.search("^RULE FIRED: recall action", parser_sim.current_event.action) or\ re.search("^RULE FIRED: move to last action", parser_sim.current_event.action): postulated_gaps, reduced_unary = 0, 0 postulate_gaps, reduce_unary = True, True parser_sim.steps(2) #exactly enough steps to make imaginal full if prints: print(parser.goals["imaginal"]) #add new word to the list of used words built_constituents.append( (Tree( str(parser.goals["imaginal"].copy().pop().TREE0_LABEL), (str(parser.goals["imaginal"].copy().pop().TREE0_HEAD), )), (None, str(parser.goals["imaginal"].copy().pop().TREE0_HEAD)))) built_constituents_reanalysis = built_constituents.copy() parser.goals["imaginal_reanalysis"].add( parser.goals["imaginal"].copy().pop()) recently_retrieved = set() #set retrieve_wh to None or, if the reanalysis already postulated a gap, to "yes" retrieve_wh = retrieve_wh_reanalysis retrieve_wh_reanalysis = None activations, agreeing_actions, matching_fs, total_fan = [], [], [], [ ] #collects total activation of rules, agreeing_actions... (used to find out what plays a role in syntactic parsing for RTs) #antecedent_carried temporarily updated in the blind analysis; we will record the original position, which is the non-temporary one, in antecedent_carried_origo and re-use it after the blind analysis; what_antecedent_carried - specifies the category of antecedent antecedent_carried_origo = antecedent_carried first_action = True if word_parsed not in tobe_removed: reanalysis_list.append("no") #by dft no reanalysis recorded #this loop for actual blind analysis while True: parser_retrievals, number_of_agreeing_actions, number_of_matching_fs, fan_size = ut.recall_action( blind_actions, parser.goals["imaginal"], parser.goals["word_info"], None, recently_retrieved, built_constituents, word_freq, label_freq, prints=False, strength_of_association=strength_of_association, postulate_gaps=postulate_gaps, reduce_unary=reduce_unary, blind={"WORD_NEXT0_LEX", "WORD_NEXT0_POS"}) # the activation for the first word comes only from the blind if word_parsed == 0: activations.append(parser_retrievals[0]) agreeing_actions.append(number_of_agreeing_actions) matching_fs.append(number_of_matching_fs) total_fan.append(fan_size) ut.collect_parse(parser_retrievals[1], built_constituents) tree0_label = built_constituents[-1][0].label() tree1_label = built_constituents[-2][0].label() tree2_label = built_constituents[-3][0].label() tree3_label = built_constituents[-4][0].label() children = { "".join(["tree", str(x)]): ["NOPOS", "NOPOS"] for x in range(4) } for x, subtree in enumerate(built_constituents[-1][0]): if isinstance(subtree, Tree) and subtree.label() != ut.EMPTY: children["tree0"][x] = subtree.label() if re.search("_BAR", children["tree0"][1]): if built_constituents[-1][0][1][1].label( ) == ut.EMPTY or re.search( "_BAR", built_constituents[-1][0][1][1].label()): children["tree0"][1] = built_constituents[-1][0][1][ 0].label() else: children["tree0"][1] = built_constituents[-1][0][1][ 1].label() for x, subtree in enumerate(built_constituents[-2][0]): if isinstance(subtree, Tree) and subtree.label() != ut.EMPTY: children["tree1"][x] = subtree.label() if re.search("_BAR", children["tree1"][1]): if built_constituents[-2][0][1][1].label( ) == ut.EMPTY or re.search( "_BAR", built_constituents[-2][0][1][1].label()): children["tree1"][1] = built_constituents[-2][0][1][ 0].label() else: children["tree1"][1] = built_constituents[-2][0][1][ 1].label() # block looping through reduce_unary (at most 2 reduce_unary allowed) if parser_retrievals[1] and parser_retrievals[1][ "action"] == 'reduce_unary': reduced_unary += 1 if reduced_unary == 2: reduce_unary = False reduced_unary = 0 else: reduced_unary = 0 reduce_unary = True if parser_retrievals[1] and parser_retrievals[1][ "action"] == 'postulate_gap': if antecedent_carried == "YES" and syntactic and re.search( "t", str(parser_retrievals[1]["action_result_label"] [0])): retrieve_wh = "yes" if re.search( "t", str(parser_retrievals[1]["action_result_label"] [0])): antecedent_carried = "NO" #at most 3 gaps allowed if postulated_gaps > 1: postulate_gaps = False postulated_gaps += 1 ci = parser.goals["imaginal"].pop() string = """ isa action_chunk WORD_NEXT0_LEX """ + '"' + str(ci.WORD_NEXT0_LEX) + '"' + """ WORD_NEXT0_POS '""" + str(ci.WORD_NEXT0_POS) + """' ANTECEDENT_CARRIED """ + antecedent_carried + """ TREE0_HEAD """ + '"' + str(parser_retrievals[1] ["action_result_label"][0]) + '"' + """ TREE0_LEFTCHILD """ + children["tree0"][0] + """ TREE0_RIGHTCHILD """ + children["tree0"][1] + """ TREE0_LABEL '-NONE-' TREE1_LEFTCHILD """ + children["tree1"][0] + """ TREE1_RIGHTCHILD """ + children["tree1"][1] + """ TREE0_HEADPOS """ + str(built_constituents[-1][1][0]) + """ TREE1_LABEL """ + '"' + tree1_label + '"' + """ TREE1_HEADPOS """ + str(built_constituents[-2][1][0]) + """ TREE1_HEAD """ + '"' + str(built_constituents[-2][1][1]) + '"' + """ TREE2_LABEL """ + '"' + tree2_label + '"' + """ TREE2_HEADPOS """ + str(built_constituents[-3][1][0]) + """ TREE2_HEAD """ + '"' + str(built_constituents[-3][1][1]) + '"' + """ TREE3_LABEL """ + '"' + tree3_label + '"' + """ TREE3_HEAD """ + '"' + str(built_constituents[-4][1][1]) + '"' + """ ACTION_PREV """ + str(parser_retrievals[1]["action"]) parser.goals["imaginal"].add( actr.chunkstring(string=string)) parser.goals["word_info"].add( actr.chunkstring(string=""" isa word form '""" + str(parser_retrievals[1] ["action_result_label"][0]) + """' cat '-NONE-'""")) elif parser_retrievals[1]: ci = parser.goals["imaginal"].pop() string = """ isa action_chunk WORD_NEXT0_LEX """ + '"' + str(ci.WORD_NEXT0_LEX) + '"' + """ WORD_NEXT0_POS '""" + str(ci.WORD_NEXT0_POS) + """' ANTECEDENT_CARRIED """ + antecedent_carried + """ TREE0_LABEL """ + '"' + str( built_constituents[-1][0].label()) + '"' + """ TREE0_HEADPOS """ + str(built_constituents[-1][1][0]) + """ TREE0_HEAD """ + '"' + str(built_constituents[-1][1][1]) + '"' + """ TREE0_LEFTCHILD """ + children["tree0"][0] + """ TREE0_RIGHTCHILD """ + children["tree0"][1] + """ TREE1_LABEL """ + '"' + tree1_label + '"' + """ TREE1_HEADPOS """ + str(built_constituents[-2][1][0]) + """ TREE1_HEAD """ + '"' + str(built_constituents[-2][1][1]) + '"' + """ TREE1_LEFTCHILD """ + children["tree1"][0] + """ TREE1_RIGHTCHILD """ + children["tree1"][1] + """ TREE2_LABEL """ + '"' + tree2_label + '"' + """ TREE2_HEADPOS """ + str(built_constituents[-3][1][0]) + """ TREE2_HEAD """ + '"' + str(built_constituents[-3][1][1]) + '"' + """ TREE3_LABEL """ + '"' + tree3_label + '"' + """ TREE3_HEAD """ + '"' + str(built_constituents[-4][1][1]) + '"' + """ ACTION_PREV """ + str(parser_retrievals[1]["action"]) parser.goals["imaginal"].add( actr.chunkstring(string=string)) else: break if parser_retrievals[1]["action"] == 'shift': #sometimes the parser would stop at BAR and shift; in reality, this is not possible since BARs are artificial categories if re.search("_BAR", built_constituents[-1][0].label()): built_constituents[-1][0].set_label( re.split("_BAR", built_constituents[-1][0].label())[0]) ci = parser.goals["imaginal"].pop() string = """ isa action_chunk TREE1_LABEL """ + '"' + tree0_label + '"' + """ TREE1_HEADPOS """ + str(built_constituents[-1][1][0]) + """ TREE1_HEAD """ + '"' + str(built_constituents[-1][1][1]) + '"' + """ TREE1_LEFTCHILD """ + children["tree0"][0] + """ TREE1_RIGHTCHILD """ + children["tree0"][1] + """ TREE2_LABEL """ + '"' + tree1_label + '"' + """ TREE2_HEADPOS """ + str(built_constituents[-2][1][0]) + """ TREE2_HEAD """ + '"' + str(built_constituents[-2][1][1]) + '"' + """ TREE3_LABEL """ + '"' + tree2_label + '"' + """ TREE3_HEAD """ + '"' + str(built_constituents[-3][1][1]) + '"' + """ ANTECEDENT_CARRIED """ + antecedent_carried + """ ACTION_PREV """ + str(parser_retrievals[1]["action"]) parser.goals["imaginal"].add( actr.chunkstring(string=string)) break postulated_gaps, reduced_unary = 0, 0 postulate_gaps, reduce_unary = True, True antecedent_carried = antecedent_carried_origo # the activation for the first word comes only from the blind if word_parsed == 0: activations_list.append(np.mean(activations) / 10) agreeing_actions_list.append(np.mean(agreeing_actions)) matching_fs_list.append(np.mean(matching_fs)) total_fan_list.append(np.mean(total_fan)) critical.pop(0) #this loop for potential reanalysis while True: if critical[0] != "no": try: critical_rule = critical_rules[critical[0]].pop(0) except KeyError: critical_rule = None parser_retrievals, number_of_agreeing_actions, number_of_matching_fs, fan_size = ut.recall_action( actions, parser.goals["imaginal_reanalysis"], parser.goals["word_info"], critical_rule, recently_retrieved, built_constituents_reanalysis, word_freq, label_freq, prints=False, strength_of_association=strength_of_association, number_retrieved=3, postulate_gaps=postulate_gaps, reduce_unary=reduce_unary, blind={}) else: parser_retrievals, number_of_agreeing_actions, number_of_matching_fs, fan_size = ut.recall_action( actions, parser.goals["imaginal_reanalysis"], parser.goals["word_info"], None, recently_retrieved, built_constituents_reanalysis, word_freq, label_freq, prints=False, strength_of_association=strength_of_association, number_retrieved=3, postulate_gaps=postulate_gaps, reduce_unary=reduce_unary, blind={}) activations.append(parser_retrievals[0]) agreeing_actions.append(number_of_agreeing_actions) matching_fs.append(number_of_matching_fs) total_fan.append(fan_size) if first_action: actions_list.append(str(parser_retrievals[1]["action"])) first_action = False ut.collect_parse(parser_retrievals[1], built_constituents_reanalysis) tree0_label = built_constituents_reanalysis[-1][0].label() tree1_label = built_constituents_reanalysis[-2][0].label() tree2_label = built_constituents_reanalysis[-3][0].label() tree3_label = built_constituents_reanalysis[-4][0].label() children = { "".join(["tree", str(x)]): ["NOPOS", "NOPOS"] for x in range(4) } for x, subtree in enumerate(built_constituents[-1][0]): if isinstance(subtree, Tree) and subtree.label() != ut.EMPTY: children["tree0"][x] = subtree.label() if re.search("_BAR", children["tree0"][1]): if built_constituents[-1][0][1][1].label( ) == ut.EMPTY or re.search( "_BAR", built_constituents[-1][0][1][1].label()): children["tree0"][1] = built_constituents[-1][0][1][ 0].label() else: children["tree0"][1] = built_constituents[-1][0][1][ 1].label() for x, subtree in enumerate(built_constituents[-2][0]): if isinstance(subtree, Tree) and subtree.label() != ut.EMPTY: children["tree1"][x] = subtree.label() if re.search("_BAR", children["tree1"][1]): if built_constituents[-2][0][1][1].label( ) == ut.EMPTY or re.search( "_BAR", built_constituents[-2][0][1][1].label()): children["tree1"][1] = built_constituents[-2][0][1][ 0].label() else: children["tree1"][1] = built_constituents[-2][0][1][ 1].label() if re.search("-TPC", tree0_label) or (re.search( "^W", tree0_label)): antecedent_carried = "YES" what_antecedent_carried = str(tree0_label) # block looping through reduce_unary (at most 2 reduce_unary allowed) if parser_retrievals[1] and parser_retrievals[1][ "action"] == 'reduce_unary': reduced_unary += 1 if reduced_unary == 2: reduce_unary = False reduced_unary = 0 else: reduced_unary = 0 reduce_unary = True if parser_retrievals[1] and parser_retrievals[1][ "action"] == 'postulate_gap': if antecedent_carried_origo == "YES" and syntactic and re.search( "t", str(parser_retrievals[1]["action_result_label"] [0])) and retrieve_wh != "yes": retrieve_wh_reanalysis = "yes" #record that based on the upcoming word info, trace should be postulated; only if the original structure did not postulate it if re.search( "t", str(parser_retrievals[1]["action_result_label"] [0])): antecedent_carried = "NO" #at most 3 gaps allowed if postulated_gaps > 1: postulate_gaps = False postulated_gaps += 1 ci = parser.goals["imaginal_reanalysis"].pop() parser.decmem.add(ci, time=parser_sim.show_time()) string = """ isa action_chunk WORD_NEXT0_LEX """ + '"' + str(ci.WORD_NEXT0_LEX) + '"' + """ WORD_NEXT0_POS """ + '"' + str(ci.WORD_NEXT0_POS) + '"' + """ ANTECEDENT_CARRIED """ + antecedent_carried + """ TREE0_HEAD """ + '"' + str( parser_retrievals[1]["action_result_label"][0] ) + '"' + """ TREE0_LABEL '-NONE-' TREE0_HEADPOS """ + str(built_constituents_reanalysis[-1][1][0]) + """ TREE0_LEFTCHILD """ + children["tree0"][0] + """ TREE0_RIGHTCHILD """ + children["tree0"][1] + """ TREE1_LABEL """ + '"' + tree1_label + '"' + """ TREE1_HEADPOS """ + str(built_constituents_reanalysis[-2][1][0]) + """ TREE1_HEAD """ + '"' + str( built_constituents_reanalysis[-2][1][1]) + '"' + """ TREE1_LEFTCHILD """ + children["tree1"][0] + """ TREE1_RIGHTCHILD """ + children["tree1"][1] + """ TREE2_LABEL """ + '"' + tree2_label + '"' + """ TREE2_HEADPOS """ + str(built_constituents_reanalysis[-3][1][0]) + """ TREE2_HEAD """ + '"' + str(built_constituents_reanalysis[-3][1] [1]) + '"' + """ TREE3_LABEL """ + '"' + tree3_label + '"' + """ TREE3_HEAD """ + '"' + str(built_constituents_reanalysis[-4][1] [1]) + '"' + """ ACTION_PREV """ + str(parser_retrievals[1]["action"]) parser.goals["imaginal_reanalysis"].add( actr.chunkstring(string=string)) parser.goals["word_info"].add( actr.chunkstring(string=""" isa word form '""" + str(parser_retrievals[1] ["action_result_label"][0]) + """' cat '-NONE-'""")) elif parser_retrievals[1]: ci = parser.goals["imaginal_reanalysis"].pop() parser.decmem.add(ci, time=parser_sim.show_time()) string = """ isa action_chunk WORD_NEXT0_LEX """ + '"' + str(ci.WORD_NEXT0_LEX) + '"' + """ WORD_NEXT0_POS """ + '"' + str(ci.WORD_NEXT0_POS) + '"' + """ ANTECEDENT_CARRIED """ + antecedent_carried + """ TREE0_LABEL """ + '"' + str(built_constituents_reanalysis[-1][0].label( )) + '"' + """ TREE0_HEADPOS """ + str(built_constituents_reanalysis[-1][1][0]) + """ TREE0_HEAD """ + '"' + str( built_constituents_reanalysis[-1][1][1]) + '"' + """ TREE0_LEFTCHILD """ + children["tree0"][0] + """ TREE0_RIGHTCHILD """ + children["tree0"][1] + """ TREE1_LABEL """ + '"' + tree1_label + '"' + """ TREE1_HEADPOS """ + str(built_constituents_reanalysis[-2][1][0]) + """ TREE1_HEAD """ + '"' + str(built_constituents_reanalysis[-2][1] [1]) + '"' + """ TREE1_LEFTCHILD """ + children["tree1"][0] + """ TREE1_RIGHTCHILD """ + children["tree1"][1] + """ TREE2_LABEL """ + '"' + tree2_label + '"' + """ TREE2_HEADPOS """ + str(built_constituents_reanalysis[-3][1][0]) + """ TREE2_HEAD """ + '"' + str(built_constituents_reanalysis[-3][1] [1]) + '"' + """ TREE3_LABEL """ + '"' + tree3_label + '"' + """ TREE3_HEAD """ + '"' + str(built_constituents_reanalysis[-4][1] [1]) + '"' + """ ACTION_PREV """ + str(parser_retrievals[1]["action"]) parser.goals["imaginal_reanalysis"].add( actr.chunkstring(string=string)) else: break if parser_retrievals[1]["action"] == 'shift': #sometimes the parser would stop at BAR and shift; in reality, this is not possible since BARs are artificial categories if re.search("_BAR", built_constituents_reanalysis[-1][0].label()): built_constituents_reanalysis[-1][0].set_label( re.split( "_BAR", built_constituents_reanalysis[-1] [0].label())[0]) ci = parser.goals["imaginal_reanalysis"].pop() parser.decmem.add(ci, time=parser_sim.show_time()) #built constituents have head info; if it is not present, use the info from imaginal_reanalysis (stores head info for terminal nodes) string = """ isa action_chunk TREE1_LABEL """ + '"' + tree0_label + '"' + """ TREE1_HEADPOS """ + str(built_constituents_reanalysis[-1][1][0]) + """ TREE1_HEAD """ + '"' + str( built_constituents_reanalysis[-1][1][1]) + '"' + """ TREE1_LEFTCHILD """ + children["tree0"][0] + """ TREE1_RIGHTCHILD """ + children["tree0"][1] + """ TREE2_LABEL """ + '"' + tree1_label + '"' + """ TREE2_HEADPOS """ + str(built_constituents_reanalysis[-2][1][0]) + """ TREE2_HEAD """ + '"' + str(built_constituents_reanalysis[-2][1] [1]) + '"' + """ TREE3_LABEL """ + '"' + tree2_label + '"' + """ TREE3_HEAD """ + '"' + str(built_constituents_reanalysis[-3][1] [1]) + '"' + """ ANTECEDENT_CARRIED """ + antecedent_carried + """ ACTION_PREV """ + str(parser_retrievals[1]["action"]) parser.goals["imaginal_reanalysis"].add( actr.chunkstring(string=string)) break cg = parser.goals["g"].pop() parser.goals["g"].add( actr.chunkstring(string=""" isa reading position """ + str(cg.position) + """ reanalysis None retrieve_wh """ + str(retrieve_wh) + """ what_retrieve """ + str( what_antecedent_carried) #used only for recall of category + """ state finished_recall""")) if built_constituents != built_constituents_reanalysis: if reanalysis and len(built_constituents) != len( built_constituents_reanalysis): #mark that the reanalysis should take place parser.goals["g"].add( actr.chunkstring(string=""" isa reading position """ + str(cg.position) + """ reanalysis yes retrieve_wh """ + str(retrieve_wh) + """ what_retrieve """ + str(what_antecedent_carried ) #used only for recall of category + """ state finished_recall""")) if word_parsed not in tobe_removed: reanalysis_list[-1] = "yes" if prints: original_tree = Tree( "X", next(zip(*built_constituents[3:]))) print("DRAWING TREE TO BE REANALYSED") print("********************************") original_tree.draw() built_constituents = built_constituents_reanalysis.copy() parser.goals["imaginal"].add( parser.goals["imaginal_reanalysis"].copy().pop()) final_tree = Tree("X", next(zip(*built_constituents[3:]))) if word_parsed not in tobe_removed: activations_list.append(np.mean(activations) / 10) wh_gaps_list.append(str(retrieve_wh)) agreeing_actions_list.append(np.mean(agreeing_actions)) matching_fs_list.append(np.mean(matching_fs)) total_fan_list.append(np.mean(total_fan)) words_list.append(sentence[word_parsed]) critical.pop(0) if prints: print("DRAWING TREE") print("********************************") # print(final_tree) # final_tree.pretty_print() final_tree.draw() return words_list, activations_list[: -1], wh_gaps_list, reanalysis_list, agreeing_actions_list[: -1], matching_fs_list[: -1], total_fan_list[: -1]
from nltk.tree import Tree vp = Tree('VP', [Tree('V', ['saw']), Tree('NP', ['him'])]) s = Tree('S', [Tree('NP', ['I']), vp]) print(s) dp1 = Tree('dp', [Tree('d', ['the']), Tree('np', ['dog'])]) dp2 = Tree('dp', [Tree('d', ['the']), Tree('np', ['cat'])]) vp = Tree('vp', [Tree('v', ['chased']), dp2]) dp1.draw() dp2.draw() vp.draw() tree = Tree('s', [dp1, vp]) print(tree) tree.draw() len(tree) print(tree.leaves()) tree.label() dp1.label()
def stanford_parser(dep_parser, sub_definition): Tree = list(dep_parser.raw_parse(sub_definition))[0] print Tree Tree.draw()
def draw_tree(tree): ''' Draws a tree... ''' Tree.draw(tree)
the_tree = Tree("sentence", tokenize.sent_tokenize(initial_sentence)) #the_tree.draw() print(the_tree) list_of_sentences = [] for i in tokenize.sent_tokenize(initial_sentence): list_of_sentences.append(tag.pos_tag(tokenize.word_tokenize(i))) print(list_of_sentences) x = Tree("ID:50", list_of_sentences) print(x) x.draw() ''' for index, value in enumerate(the_tree): print("index" + str(index)) print("value" + str(value)) print(tokenize.word_tokenize(the_tree[index])) the_tree = Tree(tokenize.word_tokenize(the_tree[index])) the_tree.draw() ''' ''' print(tokenize.sent_tokenize(initial_sentence)) print("FOR LOOP") for i in tokenize.sent_tokenize(initial_sentence):
for child in root: cur_str = '' if isinstance(child, Tree): cur_str = depth_travel(child) cur_str = child.label() + cur_str else: cur_str = '\"' + child + '\"' result_str += cur_str + "+" result_str = result_str[:-1] + ")" return result_str if __name__ == '__main__': #添加stanford环境变量,此处需要手动修改,jar包地址为绝对地址。 parser_path = r'D:\Workspace\Eclipse\Stanford-parser\lib\stanford-parser.jar' parser_model_path = r'D:\Workspace\Eclipse\Stanford-parser\lib\stanford-parser-2016.10.31-models.jar' parser_lex_model_path = r"D:\Workspace\Eclipse\Stanford-parser\model_en\lexparser\englishPCFG.ser.gz" #句法标注 # parser = stanford.StanfordParser(model_path=parser_lex_model_path, path_to_jar=parser_path, path_to_models_jar=parser_model_path) parser = stanford.StanfordParser(model_path=parser_lex_model_path, path_to_jar=parser_path, path_to_models_jar=parser_model_path) tree = Tree( 'ROOT', list( next( parser.raw_parse( "the quick brown fox jumps over the lazy dog")))) print(depth_travel(tree)) tree.draw()
def denpendency_stanford_dp(tokenized_sent, display_tree=False): result = list(stanford_dp.tagged_parse(tokenized_sent)) for item in result[0].triples(): if display_tree: Tree.draw(item) print item