def populate_dense(self): p = config.proj I, O = config.Inode, config.Onode for tree in tqdm(config.train, desc='Doing projections'): Y = np.dot(p[tree.label()][0], I[tree]) if tree.label() not in self.pi: self.pi[tree.label()] = Y else: self.pi[tree.label()] += Y for node in tree.postorder(): if len(node) == 2: a, b, c = node.label(), node[0].label(), node[1].label() pi, pj, pk = p[a][1], p[b][0], p[c][0] Zi = np.dot(pi, O[node]) Yj = np.dot(pj, I[node[0]]) Yk = np.dot(pk, I[node[1]]) r = Rule3(a, b, c) if r not in self.rule3s: self.rule3s[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk) else: self.rule3s[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk) elif len(node) == 1: a, x = node.label(), node[0] Z = np.dot(p[a][1], O[node]) r = Rule1(a, x) if r not in self.rule1s: self.rule1s[r] = Z else: self.rule1s[r] += Z else: raise RuntimeError for k, v in self.pi.items(): self.pi[k] = v / len(config.train)
def populate(self): for tree in config.train: self.pi[tree.label()] += 1 for node in tree.postorder(): self.nonterminals[node.label()] += 1 if len(node) == 2: r = Rule3(node.label(), node[0].label(), node[1].label()) self.rule3s[r] += 1 elif len(node) == 1: r = Rule1(node.label(), node[0]) self.rule1s[r] += 1 else: raise RuntimeError for k, v in self.pi.items(): self.pi[k] = v / len(config.train)
def populate(self): for tree in tqdm(config.train, desc='Doing vanilla PCFG'): for node in tree.postorder(): self.nonterminals[node.label()] += 1 if len(node) == 2: self.interminals[node.label()] += 1 r = Rule3(node.label(), node[0].label(), node[1].label()) self.rule3s_count[r] += 1 elif len(node) == 1: self.terminals[node[0]] += 1 self.preterminals[node.label()] += 1 r = Rule1(node.label(), node[0]) self.rule1s_count[r] += 1 else: raise RuntimeError self.pi[tree.label()] += 1 for k, v in self.pi.items(): self.pi[k] = v / len(config.train)
def populate(self): p = config.proj I, O = config.Inode, config.Onode for tree in tqdm(config.train, desc='Doing projections'): idx = I[tree].nonzero()[1] Y = np.sum(p[tree.label()][0][:, idx] * I[tree][0, idx].toarray()[0], axis=1) if tree.label() not in self.pi: self.pi[tree.label()] = Y else: self.pi[tree.label()] += Y for node in tree.postorder(): if len(node) == 2: a, b, c = node.label(), node[0].label(), node[1].label() pi, pj, pk = p[a][1], p[b][0], p[c][0] idx = O[node].nonzero()[1] Zi = np.sum(pi[:, idx] * O[node][0, idx].toarray()[0], axis=1) idx = I[node[0]].nonzero()[1] Yj = np.sum(pj[:, idx] * I[node[0]][0, idx].toarray()[0], axis=1) idx = I[node[1]].nonzero()[1] Yk = np.sum(pk[:, idx] * I[node[1]][0, idx].toarray()[0], axis=1) r = Rule3(a, b, c) if r not in self.rule3s: self.rule3s[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk) else: self.rule3s[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk) elif len(node) == 1: a, x = node.label(), node[0] idx = O[node].nonzero()[1] Z = np.sum(p[a][1][:, idx] * O[node][0, idx].toarray()[0], axis=1) r = Rule1(a, x) if r not in self.rule1s: self.rule1s[r] = Z else: self.rule1s[r] += Z else: raise RuntimeError for k, v in self.pi.items(): self.pi[k] = v / len(config.train)
def populate(self): I, O = config.I, config.O pI, pO = Counter(), Counter() config.pI, config.pO = pI, pO for tree in tqdm(config.train, desc='Constructing L-PCFG'): for node in tree.postorder(): if len(node) == 2: a, b, c = node.label(), node[0].label(), node[1].label() Zi = O[a][pO[a]] pO[a] += 1 Yj = I[b][pI[b]] pI[b] += 1 Yk = I[c][pI[c]] pI[c] += 1 r = Rule3(a, b, c) if r not in self.rule3s: self.rule3s[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk) else: self.rule3s[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk) elif len(node) == 1: a, x = node.label(), node[0] Z = O[a][pO[a]] pO[a] += 1 r = Rule1(a, x) if r not in self.rule1s: self.rule1s[r] = Z else: self.rule1s[r] += Z else: raise RuntimeError a = node.label() if a not in self.pi: self.pi[a] = I[a][pI[a]] else: self.pi[a] += I[a][pI[a]] pI[a] += 1 for a, param in self.pi.items(): self.pi[a] = param / len(config.train)
def populate(self): p = config.proj I, O = config.Inode, config.Onode for tree in tqdm(config.train, desc='Doing projections'): idx = I[tree].nonzero()[1] Y = np.sum(p[tree.label()][0][:, idx] * I[tree][0, idx].toarray()[0], axis=1) if tree.label() not in self.pi: self.pi[tree.label()] = Y else: self.pi[tree.label()] += Y for node in tree.postorder(): if len(node) == 2: a, b, c = node.label(), node[0].label(), node[1].label() pi, pj, pk = p[a][1], p[b][0], p[c][0] idx = O[node].nonzero()[1] Zi = np.sum(pi[:, idx] * O[node][0, idx].toarray()[0], axis=1) idx = I[node[0]].nonzero()[1] Yj = np.sum(pj[:, idx] * I[node[0]][0, idx].toarray()[0], axis=1) idx = I[node[1]].nonzero()[1] Yk = np.sum(pk[:, idx] * I[node[1]][0, idx].toarray()[0], axis=1) r = Rule3(a, b, c) if r not in self.Eijk: self.Eijk[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk) self.Eij[r] = np.einsum('i,j->ij', Zi, Yj) self.Eik[r] = np.einsum('i,k->ik', Zi, Yk) self.Ejk[r] = np.einsum('j,k->jk', Yj, Yk) self.Ei[r] = Zi self.Ej[r] = Yj self.Ek[r] = Yk else: self.Eijk[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk) self.Eij[r] += np.einsum('i,j->ij', Zi, Yj) self.Eik[r] += np.einsum('i,k->ik', Zi, Yk) self.Ejk[r] += np.einsum('j,k->jk', Yj, Yk) self.Ei[r] += Zi self.Ej[r] += Yj self.Ek[r] += Yk if a not in self.H: self.H[a] = Zi else: self.H[a] += Zi if b not in self.F: self.F[b] = Yj else: self.F[b] += Yj if c not in self.F: self.F[c] = Yk else: self.F[c] += Yk elif len(node) == 1: a, x = node.label(), node[0] idx = O[node].nonzero()[1] Z = np.sum(p[a][1][:, idx] * O[node][0, idx].toarray()[0], axis=1) r = Rule1(a, x) if r not in self.Eax: self.Eax[r] = Z else: self.Eax[r] += Z if a not in self.H: self.H[a] = deepcopy(Z) else: self.H[a] += Z else: raise RuntimeError for k, v in self.pi.items(): self.pi[k] = v / len(config.train)
def populate(self): I, O = config.I, config.O pI, pO = Counter(), Counter() config.pI, config.pO = pI, pO for tree in tqdm(config.train, desc='Constructing L-PCFG'): for node in tree.postorder(): if len(node) == 2: a, b, c = node.label(), node[0].label(), node[1].label() Zi = O[a][pO[a]] pO[a] += 1 Yj = I[b][pI[b]] pI[b] += 1 Yk = I[c][pI[c]] pI[c] += 1 r = Rule3(a, b, c) if r not in self.Eijk: self.Eijk[r] = np.einsum('i,j,k->ijk', Zi, Yj, Yk) self.Eij[r] = np.einsum('i,j->ij', Zi, Yj) self.Eik[r] = np.einsum('i,k->ik', Zi, Yk) self.Ejk[r] = np.einsum('j,k->jk', Yj, Yk) self.Ei[r] = deepcopy(Zi) self.Ej[r] = deepcopy(Yj) self.Ek[r] = deepcopy(Yk) else: self.Eijk[r] += np.einsum('i,j,k->ijk', Zi, Yj, Yk) self.Eij[r] += np.einsum('i,j->ij', Zi, Yj) self.Eik[r] += np.einsum('i,k->ik', Zi, Yk) self.Ejk[r] += np.einsum('j,k->jk', Yj, Yk) self.Ei[r] += Zi self.Ej[r] += Yj self.Ek[r] += Yk if a not in self.H: self.H[a] = deepcopy(Zi) else: self.H[a] += Zi if b not in self.F: self.F[b] = deepcopy(Yj) else: self.F[b] += Yj if c not in self.F: self.F[c] = deepcopy(Yk) else: self.F[c] += Yk elif len(node) == 1: a, x = node.label(), node[0] Z = O[a][pO[a]] pO[a] += 1 r = Rule1(a, x) if r not in self.Eax: self.Eax[r] = deepcopy(Z) else: self.Eax[r] += Z if a not in self.H: self.H[a] = deepcopy(Z) else: self.H[a] += Z else: raise RuntimeError a = node.label() Y = I[a][pI[a]] pI[a] += 1 if a not in self.pi: self.pi[a] = deepcopy(Y) else: self.pi[a] += Y if a not in self.F: self.F[a] = deepcopy(Y) else: self.F[a] += Y for a, param in self.pi.items(): self.pi[a] = param / len(config.train)