def __conjugate(self): for rect in self.rectangles.itervalues(): conj = self.rectangles[(rect.e2.conj, rect.e1.conj)] conjugate(rect, conj) for diag in rect.diagonals.itervalues(): assert diag.rectangle == rect D = diag.D - diag.rectangle.e1.len + diag.rectangle.e2.len pathset = diag.pathset.conj() if experimental.filter == experimental.Filter.pathsets else None conj = rect.conj.diagonals[D, pathset] conjugate(diag, conj)
def add_edge(self, eid, v1id, v2id, edge_len, conj_id): #assert eid != conj_id, "Self-conjugate edges are not supported yet" if eid in self.es: return self.es[eid] if eid > self.max_eid or conj_id > self.max_eid: self.max_eid = max(eid, conj_id) conj = self.es.get(conj_id, None) v1 = self.vs[v1id] v2 = self.vs[v2id] e = Edge(eid, v1, v2, edge_len, conj) self.es[eid] = e if eid == conj_id: conjugate(e, e) return e
def add_diagonal_and_conj(self, diag): for old_diag in self.diagonals: if diag.rectangle.e1 == old_diag.rectangle.e1 and diag.rectangle.e2 == old_diag.rectangle.e2: if diag.D == old_diag.D: return rect = diag.rectangle rect_conj = Rectangle(rect.e2.conj, rect.e1.conj) conjugate(rect, rect_conj) D = diag.D - diag.rectangle.e1.len + diag.rectangle.e2.len pathset = diag.pathset.conj() if experimental.filter == experimental.Filter.pathsets else None rect_conj.add_diagonal(self.d, D, pathset) diag_conj = rect.conj.diagonals[D, pathset] conjugate(diag, diag_conj) return self.add_diagonal(diag)
def __build_from_graph(self, config): for e1 in self.graph.es.itervalues(): for e2, D in self.graph.dfs(e1, self.d): if e1.eid != e2.eid and (e1.eid, e2.eid) not in self.prd: continue if (e1, e2) not in self.rectangles: self.rectangles[(e1, e2)] = Rectangle(e1, e2) r = self.rectangles[(e1, e2)] if (e2.conj, e1.conj) not in self.rectangles: self.rectangles[(e2.conj, e1.conj)] = Rectangle(e2.conj, e1.conj) r_conj = self.rectangles[(e2.conj, e1.conj)] conjugate(r, r_conj) r.add_diagonal(self.d, D) D_conj = D - e1.len + e2.len r_conj.add_diagonal(self.d, D_conj) diag = r.diagonals[(D, None)] diag_conj = r_conj.diagonals[(D_conj, None)] conjugate(diag, diag_conj) if (e1.eid, e2.eid) in self.prd: for (D1, weight, delta) in self.prd[(e1.eid, e2.eid)]: diag.inc_prd_support(D1, weight, delta, config) if diag != diag.conj: diag.conj.inc_prd_support(D1 - e1.len + e2.len, weight, delta, config) if (e2.conj.eid, e1.conj.eid) in self.prd: for (D1, weight, delta) in self.prd[(e2.conj.eid, e1.conj.eid)]: diag_conj.inc_prd_support(D1, weight, delta, config) if diag != diag.conj: diag.inc_prd_support(D1 - e2.conj.len + e1.conj.len, weight, delta, config) if diag.support() < 0.0000000001: if (D, None) in r.diagonals: del r.diagonals[(D, None)] if (D_conj, None) in r_conj.diagonals: del r_conj.diagonals[(D_conj, None)] if len(r.diagonals.keys()) == 0: if (e1, e2) in self.rectangles: del self.rectangles[(e1, e2)] if (e2.conj, e1.conj) in self.rectangles: del self.rectangles[(e2.conj, e1.conj)]
def add_diagonal(self, diag): if diag in self.diagonals: return be = self.__add_bedge(diag) conj = self.__add_bedge(diag.conj) if diag.conj != diag else be self.diagonals.add(diag) self.diagonals.add(diag.conj) conjugate(be, conj) conjugate(be.v1, conj.v2) conjugate(be.v2, conj.v1) return (be, conj)
def add_diagonal(self, diag): if diag in self.diagonals: return be = self.__add_bedge(diag) conj = be if diag.conj != diag: conj = self.__add_bedge(diag.conj) #print len(self.es), len(self.vs) self.diagonals.add(diag) self.diagonals.add(diag.conj) # print diag.rectangle.e1.eid, diag.rectangle.e2.eid, diag.D, diag.conj.rectangle.e1.eid, diag.conj.rectangle.e2.eid, diag.conj.D # print diag.conj.rectangle.e1.eid, diag.conj.rectangle.e2.eid, diag.conj.D, diag.rectangle.e1.eid, diag.rectangle.e2.eid, diag.D conjugate(be, conj) conjugate(be.v1, conj.v2) conjugate(be.v2, conj.v1) return (be, conj)
# D1 N1 (Aux) also V any/some N2. # Any/Some N1 (Aux) also V D2 N2. try: N_Prepend = choice(all_singular_animate_nouns) all_prependers = [ "According to the %s," % (N_Prepend[0]), "In the %s\'s opinion," % (N_Prepend[0]), "From what the %s heard," % (N_Prepend[0]), "As the %s knows," % (N_Prepend[0]), "Just as the %s said," % (N_Prepend[0]) ] Prepend = choice(all_prependers) N1 = choice(all_non_singular_animate_nouns, [N_Prepend]) D1 = choice(get_matched_by(N1, "arg_1", all_common_dets)) V = choice(get_matched_by(N1, "arg_1", all_transitive_verbs)) V = conjugate(V, N1, allow_negated=False) N2 = choice(get_matches_of(V, "arg_2", all_non_singular_nouns), [N1, N_Prepend]) D2 = choice(get_matched_by(N2, "arg_1", all_common_dets)) any_decoy_N2 = choice(get_matched_by(N2, "arg_1", any_decoys)) any_decoy_N1 = choice(get_matched_by(N1, "arg_1", any_decoys)) except IndexError: continue # sentence templates # Only D1 N1 (Aux) V any/some N2. # Any/Some N1 (Aux) only V D2 N2. # D1 N1 (Aux) also V any/some N2. # Any/Some N1 (Aux) also V D2 N2. sentence_1 = "%s only %s %s %s any %s." % (Prepend, D1[0], N1[0], V[0],
def __join_biedges(self, be1, be2): ## u ---be1---> v ---be2---> w ## z <--be4---- y <--be3---- x ## transforms to: ## u --------beA--------> w ## z <-------beB--------- x be3 = be2.conj be4 = be1.conj u, v, w = be1.v1, be1.v2, be2.v2 x, y, z = be3.v1, be3.v2, be4.v2 assert be1.v2 == be2.v1 assert 1 == len(v.inn) == len(v.out) == len(y.inn) == len(y.out), (be1.eid, be2.eid, len(v.inn), len(v.out), len(y.inn), len(y.out)) assert be1 != be3, "=> (v == y) => (in-degree(v) > 1)" assert be2 != be4, "=> (v == y) => (out-degree(v) > 1)" if be1 == be4 and be2 == be3: assert z == v == x assert u == y == w assert False return # TODO: think how to condense better, rare case if be2 == be3: # loop on the right: be1->be2=be3->be4 assert v == x assert y == w beA = BEdge(u, z, None) beA.diagonals = be1.diagonals + be2.diagonals + be4.diagonals first_connect = self.test_utils.should_join(be1.diagonals[-1], be2.diagonals[0]) second_connect = self.test_utils.should_join(be2.diagonals[-1],be4.diagonals[0]) if first_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 if second_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect +=1 conjugate(beA, beA) self.es[beA.eid] = beA u.out.remove(be1) w.inn.remove(be2) z.inn.remove(be4) del self.es[be1.eid] del self.es[be2.eid] del self.es[be4.eid] elif be1 == be4: # loop on the left: be3->be1=be4->be2 assert u == y assert z == v beA = BEdge(x, w, None) beA.diagonals = be3.diagonals + be1.diagonals + be2.diagonals first_connect = self.test_utils.should_join(be3.diagonals[-1], be1.diagonals[0]) second_connect = self.test_utils.should_join(be1.diagonals[-1],be2.diagonals[0]) if first_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 if second_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect +=1 conjugate(beA, beA) self.es[beA.eid] = beA u.out.remove(be1) w.inn.remove(be2) x.out.remove(be3) del self.es[be1.eid] del self.es[be2.eid] del self.es[be3.eid] else: # most usual case assert len({be1, be2, be3, be4}) == 4, (be1, be2, be3, be4) # all different if u == w: assert z == x assert len({u, v, w, x, y, z}) == 4, (u, v, w, x, y, z) # same ends, ok elif u == x: assert z == w assert len({u, v, w, x, y, z}) == 4, (u, v, w, x, y, z) # conjugated ends, ok else: assert len({u, v, w, x, y, z}) == 6, (u, v, w, x, y, z) # all different # TODO: check (x == u and w == z) beA = BEdge(u, w, None) beA.diagonals = be1.diagonals + be2.diagonals if self.test_utils: first_connect = self.test_utils.should_join(be1.diagonals[-1], be2.diagonals[0]) second_connect = self.test_utils.should_join(be3.diagonals[-1],be4.diagonals[0]) if first_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 if second_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect +=1 beB = BEdge(x, z, None) beB.diagonals = be3.diagonals + be4.diagonals conjugate(beA, beB) self.es[beA.eid] = beA self.es[beB.eid] = beB u.out.remove(be1) w.inn.remove(be2) x.out.remove(be3) z.inn.remove(be4) del self.es[be1.eid] del self.es[be2.eid] del self.es[be3.eid] del self.es[be4.eid] v.inn, v.out = [], [] y.inn, y.out = [], [] self.vs.pop(v.key) self.vs.pop(y.key)
def sample_nested_rc_2_rcs(self): V1 = choice(self.all_non_ing_transitive_verbs) V1_ing = self.get_ing_form(V1) NP1 = choice(get_matches_of(V1, "arg_1", self.safe_nouns)) V1 = conjugate(V1, NP1) try: V1_ing = conjugate(V1_ing, NP1) except Exception: pass D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) NP2 = choice(get_matches_of(V1, "arg_2", self.safe_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) S1 = " ".join([D1[0], NP1[0], "%s", D2[0], NP2[0]]) option = random.randint(0, 2) if option == 0: RC1, arg_RC1, V_RC1, V_RC1_ing, D_RC1 = self.subject_relative_clause( NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b, D_RC1_b = self.subject_relative_clause( arg_RC1, bind=False) elif option == 1: RC1, arg_RC1, V_RC1, V_RC1_ing, D_RC1 = self.object_relative_clause( NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b, D_RC1_b = self.subject_relative_clause( arg_RC1, bind=False) else: RC1, arg_RC1, V_RC1, V_RC1_ing, D_RC1 = self.subject_relative_clause( NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b, D_RC1_b = self.object_relative_clause( arg_RC1, bind=False) option = random.randint(0, 2) if option == 0: RC2, arg_RC2, V_RC2, V_RC2_ing, D_RC2 = self.subject_relative_clause( NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b, D_RC2_b = self.subject_relative_clause( arg_RC2, bind=False) elif option == 1: RC2, arg_RC2, V_RC2, V_RC2_ing, D_RC2 = self.object_relative_clause( NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b, D_RC2_b = self.subject_relative_clause( arg_RC2, bind=False) else: RC2, arg_RC2, V_RC2, V_RC2_ing, D_RC2 = self.subject_relative_clause( NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b, D_RC2_b = self.object_relative_clause( arg_RC2, bind=False) RC1_iv, V_RC1_iv, V_RC1_iv_ing = self.subject_relative_clause_intransitive( NP1) RC2_iv, V_RC2_iv, V_RC2_iv_ing = self.subject_relative_clause_intransitive( NP2) track_sentence = [(S1, RC1, RC2), (S1, RC1, RC2)] data = [] Ds = [] option = random.randint(0, 1) if option == 0: data.append(" ".join([ "%s", NP1[0], RC1.format(v=V_RC1, d="%s", rc=(RC1_b.format(v=V_RC1_b, d="%s"))), V1_ing[0], "%s", NP2[0], RC2_iv % V_RC2_iv ])) Ds.append((D1[0], D_RC1, D_RC1_b, D2[0])) else: data.append(" ".join([ "%s", NP1[0], RC1_iv % V_RC1_iv, V1_ing[0], "%s", NP2[0], RC2.format(v=V_RC2, d="%s", rc=(RC2_b.format(v=V_RC2_b, d="%s"))) ])) Ds.append((D1[0], D2[0], D_RC2, D_RC2_b)) option = random.randint(0, 5) if option == 0: data.append(" ".join([ "%s", NP1[0], RC1.format(v=V_RC1_ing, d="%s", rc=(RC1_b.format(v=V_RC1_b, d="%s"))), V1[0], "%s", NP2[0] ])) Ds.append((D1[0], D_RC1, D_RC1_b, D2[0])) elif option == 1: data.append(" ".join([ "%s", NP1[0], RC1.format(v=V_RC1, d="%s", rc=(RC1_b.format(v=V_RC1_ing_b, d="%s"))), V1[0], "%s", NP2[0] ])) Ds.append((D1[0], D_RC1, D_RC1_b, D2[0])) elif option == 2: data.append(" ".join([ "%s", NP1[0], RC1.format(v=V_RC1, d="%s", rc=(RC1_b.format(v=V_RC1_b, d="%s"))), V1[0], "%s", NP2[0], RC2_iv % V_RC2_iv_ing ])) Ds.append((D1[0], D_RC1, D_RC1_b, D2[0])) elif option == 3: data.append(" ".join([ "%s", NP1[0], V1[0], "%s", NP2[0], RC2.format(v=V_RC2_ing, d="%s", rc=(RC2_b.format(v=V_RC2_b, d="%s"))) ])) Ds.append((D1[0], D2[0], D_RC2, D_RC2_b)) elif option == 4: data.append(" ".join([ "%s", NP1[0], V1[0], "%s", NP2[0], RC2.format(v=V_RC2, d="%s", rc=(RC2_b.format(v=V_RC2_ing_b, d="%s"))) ])) Ds.append((D1[0], D2[0], D_RC2, D_RC2_b)) else: data.append(" ".join([ "%s", NP1[0], RC1_iv % V_RC1_iv_ing, V1[0], "%s", NP2[0], RC2.format(v=V_RC2, d="%s", rc=(RC2_b.format(v=V_RC2_b, d="%s"))) ])) Ds.append((D1[0], D2[0], D_RC2, D_RC2_b)) return data, track_sentence, Ds
N3 = N_to_DP_mutate(choice(get_matched_by(V2, "arg_2", get_all_conjunctive([("animate", N1["animate"]), ("sg", N1["sg"])], all_nouns)))) if N3["sg"] == "0": # if N3 is plural, then V1 must not be bare try: V1 = choice(get_matched_by(N3, "arg_1", all_non_bare_verbs)) except IndexError: print(V2[0], N1[0], N3[0]) continue else: V1 = choice(get_matched_by(N3, "arg_1", all_safe_verbs)) V1 = conjugate(V1, N1) N2 = N_to_DP_mutate(choice(get_matches_of(V1, "arg_2", all_nouns))) Rel = choice(get_matched_by(N1, "arg_1", get_all("category_2", "rel"))) subject_agree_auxiliaries = get_matched_by(N1, "arg_1", all_auxiliaries) for Aux in subject_agree_auxiliaries: acceptability = 1 if is_match_disj(V2, Aux["arg_2"]) else 0 sentence_1 = "%s %s %s %s %s %s %s?" % (Aux[0], N1[0], Rel[0], V1[0], N2[0], V2[0], N3[0]) sentence_2 = "%s %s %s %s %s %s %s?" % (Aux[0], N1[0], V2[0], N3[0], Rel[0], V1[0], N2[0]) sentence_1 = string_beautify(sentence_1)
("category_2", "D")]), get_all("expression", "this"), get_all("expression", "these"), get_all("expression", "those"))) # sample sentences until desired number while len(sentences) < number_to_generate: # sentence template # D1 N1 who V1 any/the/D2 N2 V2 any/the/D3 N3 # every boy who bought any/the/some apples sang any/the/a song # build all lexical items #TODO: throw in modifiers try: N1 = choice(all_animate_nouns) D1_up = choice(get_matched_by(N1, "arg_1", all_UE_UE_quantifiers)) D1_down = choice(get_matched_by(N1, "arg_1", all_DE_UE_quantifiers)) V1 = choice(get_matched_by(N1, "arg_1", all_transitive_verbs)) V1 = conjugate(V1, N1, allow_negated=False) N2 = choice(get_matches_of(V1, "arg_2", all_non_singular_nouns), [N1]) D2 = choice( get_matched_by(N2, "arg_1", all_UE_UE_quantifiers), [D1_up, D1_down] ) # restrict to UE quantifiers, otherwise there could be another licensor V2 = choice(get_matched_by(N1, "arg_1", all_transitive_verbs), [V1]) V2 = conjugate(V2, N1, allow_negated=False) N3 = choice(get_matches_of(V2, "arg_2", all_non_singular_nouns), [N1, N2]) D3 = choice(get_matched_by(N3, "arg_1", all_UE_UE_quantifiers), [D1_up, D1_down]) any_decoy_N2 = choice(get_matched_by(N2, "arg_1", any_decoys)) any_decoy_N3 = choice(get_matched_by(N3, "arg_1", any_decoys)) except IndexError: print(N1[0], N2[0], V2[0])
def sample_CP_noun_RC(self): NP1 = choice(self.CP_nouns) V1 = choice( get_matched_by(NP1, "arg_1", self.all_non_ing_transitive_verbs)) V1_ing = self.get_ing_form(V1) V1 = conjugate(V1, NP1) V1_ing = conjugate(V1_ing, NP1) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) NP2 = choice(get_matches_of(V1, "arg_2", self.safe_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) V_emb = choice(self.all_non_ing_transitive_verbs) V_emb_ing = self.get_ing_form(V_emb) NP1_emb = choice(get_matches_of(V_emb, "arg_1", self.safe_nouns)) V_emb = conjugate(V_emb, NP1_emb) V_emb_ing = conjugate(V_emb_ing, NP1_emb) D1_emb = choice(get_matched_by(NP1_emb, "arg_1", self.safe_dets)) NP2_emb = choice(get_matches_of(V_emb, "arg_2", self.safe_nouns)) D2_emb = choice(get_matched_by(NP2_emb, "arg_1", self.safe_dets)) RC2, V_RC2, V_RC2_ing = self.subject_relative_clause_intransitive(NP2) RC1_emb, V_RC1_emb, V_RC1_emb_ing = self.subject_relative_clause_intransitive( NP1_emb) RC2_emb, V_RC2_emb, V_RC2_emb_ing = self.subject_relative_clause_intransitive( NP2_emb) S1 = " ".join([ D1[0], NP1[0], NP1_emb[0], V_emb[0], NP2_emb[0], V1[0], D2[0], NP2[0] ]) track_sentence = [(S1), (S1)] data = [] Ds = [] option = random.randint(0, 2) if option == 0: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb, V_emb[0], "%s", NP2_emb[0], V1_ing[0], "%s", NP2[0] ])) elif option == 1: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], RC2_emb % V_RC2_emb, V1_ing[0], "%s", NP2[0] ])) else: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], V1_ing[0], "%s", NP2[0], RC2 % V_RC2 ])) option = random.randint(0, 5) if option == 0: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb, V_emb_ing[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0] ])) elif option == 1: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb_ing[0], "%s", NP2_emb[0], RC2_emb % V_RC2_emb, V1[0], "%s", NP2[0] ])) elif option == 2: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb_ing[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0], RC2 % V_RC2 ])) elif option == 3: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb_ing, V_emb[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0] ])) elif option == 4: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], RC2_emb % V_RC2_emb_ing, V1[0], "%s", NP2[0] ])) else: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0], RC2 % V_RC2_ing ])) Ds.append((D1[0], D1_emb[0], D2_emb[0], D2[0])) Ds.append((D1[0], D1_emb[0], D2_emb[0], D2[0])) return data, track_sentence, Ds
def __init__(self, vid, conj): Abstract_Vertex.__init__(self, vid) conjugate(self, conj)
def __init__(self, eid, v1, v2, edge_len, conj): Abstract_Edge.__init__(self, v1, v2, eid) self.len = edge_len conjugate(self, conj) self.seq = None self.cvr = 0
# sample sentences until desired number while len(sentences) < number_to_generate: # sentence template # Connector D1 N1 V_mix D2 N2 V_intrans # While the lady dressed the baby cried. # build all lexical items #TODO: throw in modifiers Conx = choice(all_connectors) N1 = choice(all_animate_nouns) D1 = choice(get_matched_by(N1, "arg_1", all_common_dets)) V_mix = choice(get_matched_by(N1, "arg_1", all_transitive_verbs)) V_trans = choice(get_matched_by(N1, "arg_1", all_transitive_verbs)) V_intrans = choice(get_matched_by(N1, "arg_1", all_intransitive_verbs)) conjugate(V_mix, N1, allow_negated=False) N2 = choice(all_animate_nouns) D2 = choice(get_matched_by(N2, "arg_1", all_common_dets)) V_final = choice(get_matched_by(N2, "arg_1", all_intransitive_verbs)) conjugate(V_intrans, N2, allow_negated=True) # build classic garden paths sentence_1 = "%s %s %s %s %s %s %s ." % (Conx, D1[0], N1[0], V_mix[0], D2[0], N2[0], V_final[0]) sentence_2 = "%s %s %s %s %s %s %s ." % (Conx, D1[0], N1[0], V_intrans[0], D2[0], N2[0], V_final[0]) sentence_3 = "%s %s %s %s %s %s %s ." % (Conx, D1[0], N1[0], V_trans[0], D2[0], N2[0], V_final[0]) # flip the order of clauses sentence_4 = "%s %s %s %s %s %s %s ." % (D2[0], N2[0], V_final[0], Conx,
def sample_CP_verb_RC(self): V1 = choice(self.CP_verbs_non_ing) V1_ing = self.get_ing_form(V1) NP1 = choice(get_matches_of(V1, "arg_1", self.safe_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", all_very_common_dets)) V1 = conjugate(V1, NP1) V1_ing = conjugate(V1_ing, NP1) V2 = choice(self.all_non_ing_transitive_verbs) V2_ing = self.get_ing_form(V2) NP2 = choice(get_matches_of(V2, "arg_1", self.safe_nouns)) V2 = conjugate(V2, NP2) V2_ing = conjugate(V2_ing, NP2) D2 = choice(get_matched_by(NP2, "arg_1", all_very_common_dets)) NP3 = choice(get_matches_of(V2, "arg_2", self.safe_nouns)) D3 = choice(get_matched_by(NP3, "arg_1", all_very_common_dets)) if bool(random.randint(0, 1)): RC1, _, V_RC1, V_RC1_ing = self.subject_relative_clause(NP1) else: RC1, _, V_RC1, V_RC1_ing = self.object_relative_clause(NP1) if bool(random.randint(0, 1)): RC2, _, V_RC2, V_RC2_ing = self.subject_relative_clause(NP2) else: RC2, _, V_RC2, V_RC2_ing = self.object_relative_clause(NP2) if bool(random.randint(0, 1)): RC3, _, V_RC3, V_RC3_ing = self.subject_relative_clause(NP3) else: RC3, _, V_RC3, V_RC3_ing = self.object_relative_clause(NP3) S1 = " ".join([D1[0], "%s", NP1[0], "%s", V1[0], "that", D2[0], "%s", NP2[0], V2[0], D3[0], "%s", NP3[0]]) track_sentence = [ (S1, RC1, RC2, RC3), (S1, RC1, RC2, RC3) ] data = [] option = random.randint(0, 2) if option == 0: data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1, V1_ing[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0]])) elif option == 1: data.append(" ".join([D1[0], NP1[0], V1_ing[0], "that", D2[0], NP2[0], RC2 % V_RC2, V2[0], D3[0], NP3[0]])) else: data.append(" ".join([D1[0], NP1[0], V1_ing[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0], RC3 % V_RC3])) option = random.randint(0, 5) if option == 0: data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1_ing, V1[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0]])) elif option == 1: data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1, V1[0], "that", D2[0], NP2[0], V2_ing[0], D3[0], NP3[0]])) elif option == 2: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], RC2 % V_RC2_ing, V2[0], D3[0], NP3[0]])) elif option == 3: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], RC2 % V_RC2, V2_ing[0], D3[0], NP3[0]])) elif option == 4: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0], RC3 % V_RC3_ing])) else: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], V2_ing[0], D3[0], NP3[0], RC3 % V_RC3])) return data, track_sentence
# sample sentences until desired number while len(sentences) < number_to_generate: # ever sentences # sentence template # The N1 wonder whether N2 ever V1 N3 # The girls wonder whether James ever ate apples # build all lexical items try: N1 = choice(all_non_singular_animate_nouns) N2 = choice(all_animate_nouns, [N1]) V1 = choice( get_matched_by(N2, "arg_1", all_non_progressive_transitive_verbs)) V1 = conjugate(V1, N2, allow_negated=False) N3 = choice(get_matches_of(V1, "arg_2", all_non_singular_nouns), [N1, N2]) decoy = choice( ["often", "also", "obviously", "clearly", "fortunately"]) except IndexError: print(N2[0], V1[0]) continue # build sentences with question environment sentence_1 = "The %s wonder whether the %s ever %s the %s." % ( N1[0], N2[0], V1[0], N3[0]) sentence_2 = "The %s wonder whether the %s %s %s the %s." % ( N1[0], N2[0], decoy, V1[0], N3[0]) sentence_3 = "The %s ever wonder whether the %s %s the %s." % ( N1[0], N2[0], V1[0], N3[0])
def sample_nested_rc(self): V1 = choice(self.all_non_ing_transitive_verbs) V1_ing = self.get_ing_form(V1) NP1 = choice(get_matches_of(V1, "arg_1", self.safe_nouns)) V1 = conjugate(V1, NP1) V1_ing = conjugate(V1_ing, NP1) D1 = choice(get_matched_by(NP1, "arg_1", all_very_common_dets)) NP2 = choice(get_matches_of(V1, "arg_2", self.safe_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", all_very_common_dets)) S1 = " ".join([D1[0], NP1[0], "%s", D2[0], NP2[0]]) option = random.randint(0, 2) if option == 0: RC1, arg_RC1, V_RC1, V_RC1_ing = self.subject_relative_clause( NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b = self.subject_relative_clause( arg_RC1, bind=False) elif option == 1: RC1, arg_RC1, V_RC1, V_RC1_ing = self.object_relative_clause( NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b = self.subject_relative_clause( arg_RC1, bind=False) else: RC1, arg_RC1, V_RC1, V_RC1_ing = self.subject_relative_clause( NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b = self.object_relative_clause( arg_RC1, bind=False) option = random.randint(0, 2) if option == 0: RC2, arg_RC2, V_RC2, V_RC2_ing = self.subject_relative_clause( NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b = self.subject_relative_clause( arg_RC2, bind=False) elif option == 1: RC2, arg_RC2, V_RC2, V_RC2_ing = self.object_relative_clause( NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b = self.subject_relative_clause( arg_RC2, bind=False) else: RC2, arg_RC2, V_RC2, V_RC2_ing = self.subject_relative_clause( NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b = self.object_relative_clause( arg_RC2, bind=False) track_sentence = [(S1, RC1, RC2), (S1, RC1, RC2)] data = [] option = random.randint(0, 1) if option == 0: data.append(" ".join([ D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_b)), V1_ing[0], D2[0], NP2[0] ])) else: data.append(" ".join([ D1[0], NP1[0], V1_ing[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_b)) ])) option = random.randint(0, 3) if option == 0: data.append(" ".join([ D1[0], NP1[0], RC1.format(v=V_RC1_ing, rc=(RC1_b % V_RC1_b)), V1[0], D2[0], NP2[0] ])) elif option == 1: data.append(" ".join([ D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_ing_b)), V1[0], D2[0], NP2[0] ])) elif option == 2: data.append(" ".join([ D1[0], NP1[0], V1[0], D2[0], NP2[0], RC2.format(v=V_RC2_ing, rc=(RC2_b % V_RC2_b)) ])) else: data.append(" ".join([ D1[0], NP1[0], V1[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_ing_b)) ])) return data, track_sentence
def __join_biedges(self, be1, be2): ## u ---be1---> v ---be2---> w ## z <--be4---- y <--be3---- x ## transforms to: ## u --------beA--------> w ## z <-------beB--------- x be3 = be2.conj be4 = be1.conj u, v, w = be1.v1, be1.v2, be2.v2 x, y, z = be3.v1, be3.v2, be4.v2 assert be1.v2 == be2.v1 assert 1 == len(v.inn) == len(v.out) == len(y.inn) == len( y.out), (be1.eid, be2.eid, len(v.inn), len(v.out), len(y.inn), len(y.out)) if be1 == be3 or be2 == be4: return assert be1 != be3, "=> (v == y) => (in-degree(v) > 1)" assert be2 != be4, "=> (v == y) => (out-degree(v) > 1)" if be1 == be4 and be2 == be3: assert z == v == x assert u == y == w #assert False return # TODO: think how to condense better, rare case!!!! if be2 == be3: # loop on the right: be1->be2=be3->be4 assert v == x assert y == w beA = BEdge(u, z, None) beA.diagonals = be1.diagonals + be2.diagonals + be4.diagonals first_connect = self.test_utils.should_join( be1.diagonals[-1], be2.diagonals[0]) second_connect = self.test_utils.should_join( be2.diagonals[-1], be4.diagonals[0]) if first_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 if second_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 conjugate(beA, beA) self.es[beA.eid] = beA u.out.remove(be1) w.inn.remove(be2) z.inn.remove(be4) del self.es[be1.eid] del self.es[be2.eid] del self.es[be4.eid] elif be1 == be4: # loop on the left: be3->be1=be4->be2 assert u == y assert z == v beA = BEdge(x, w, None) beA.diagonals = be3.diagonals + be1.diagonals + be2.diagonals first_connect = self.test_utils.should_join( be3.diagonals[-1], be1.diagonals[0]) second_connect = self.test_utils.should_join( be1.diagonals[-1], be2.diagonals[0]) if first_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 if second_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 conjugate(beA, beA) self.es[beA.eid] = beA u.out.remove(be1) w.inn.remove(be2) x.out.remove(be3) del self.es[be1.eid] del self.es[be2.eid] del self.es[be3.eid] else: # most usual case be_set = set() be_set.add(be1) be_set.add(be2) be_set.add(be3) be_set.add(be4) assert len(be_set) == 4, (be1, be2, be3, be4) #all different six_set = set() six_set.add(u) six_set.add(v) six_set.add(w) six_set.add(x) six_set.add(y) six_set.add(z) """if u == w: assert z == x assert len(six_set) == 4, (u, v, w, x, y, z) # same ends, ok elif u == x: assert z == w assert len(six_set) == 4, (u, v, w, x, y, z) # conjugated ends, ok else: assert len(six_set) == 6, (u, v, w, x, y, z) # all different # TODO: check (x == u and w == z)""" beA = BEdge(u, w, None) beA.diagonals = be1.diagonals + be2.diagonals if self.test_utils: first_connect = self.test_utils.should_join( be1.diagonals[-1], be2.diagonals[0]) second_connect = self.test_utils.should_join( be3.diagonals[-1], be4.diagonals[0]) if first_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 if second_connect: self.test_utils.join_correct += 1 else: self.test_utils.join_incorrect += 1 beB = BEdge(x, z, None) beB.diagonals = be3.diagonals + be4.diagonals conjugate(beA, beB) self.es[beA.eid] = beA self.es[beB.eid] = beB u.out.remove(be1) w.inn.remove(be2) x.out.remove(be3) z.inn.remove(be4) del self.es[be1.eid] del self.es[be2.eid] del self.es[be3.eid] del self.es[be4.eid] v.inn, v.out = [], [] y.inn, y.out = [], [] self.vs.pop(v.key) self.vs.pop(y.key)