Пример #1
0
def _try_data(dataset="cfq/mcd1",
              validfrac=0.1,
              bertname="vanilla",
              recompute=False,
              gpu=-1):
    trainds, validds, testds, fldic, inpdic, reorderer = load_ds(
        dataset=dataset,
        validfrac=validfrac,
        bertname=bertname,
        recompute=recompute)
    reorderer = Reorderer(inpD=inpdic, outD=fldic)
    ex1 = trainds[0]
    ex2 = trainds[0]
    print(inpdic.tostr(ex1[0]))
    print(inpdic.tostr(ex2[0]))
    print(fldic.tostr(ex1[1]))
    print(fldic.tostr(ex2[1]))
    a = 0
    c = 0
    tt = q.ticktock("trydata")
    tt.tick("trying data")
    for j in range(min(len(testds), 100000)):
        print(j, c)
        for _ in range(1):
            ex1 = trainds[j]
            ex2 = trainds[j]
            equal = reorderer.are_equal_trees(
                taglisp_to_tree(fldic.tostr(ex1[1])),
                taglisp_to_tree(fldic.tostr(ex2[1])))
            print(equal)
            if equal is False:
                c += 1
    print(c)
    tt.tock()
Пример #2
0
 def tensor_to_trees(x, vocab: Vocab):
     xstrs = [
         vocab.tostr(x[i]).replace("@START@", "") for i in range(len(x))
     ]
     xstrs = [re.sub("::\d+", "", xstr) for xstr in xstrs]
     trees = []
     for xstr in xstrs:
         # drop everything after @END@, if present
         xstr = xstr.split("@END@")
         xstr = xstr[0]
         # add an opening parentheses if not there
         xstr = xstr.strip()
         if len(xstr) == 0 or xstr[0] != "(":
             xstr = "(" + xstr
         # balance closing parentheses
         parenthese_imbalance = xstr.count("(") - xstr.count(")")
         xstr = xstr + ")" * max(0, parenthese_imbalance
                                 )  # append missing closing parentheses
         xstr = "(" * -min(
             0, parenthese_imbalance
         ) + xstr  # prepend missing opening parentheses
         try:
             tree = taglisp_to_tree(xstr)
             if isinstance(
                     tree,
                     tuple) and len(tree) == 2 and tree[0] is None:
                 tree = None
         except Exception as e:
             tree = None
         trees.append(tree)
     return trees
Пример #3
0
    def __call__(self, x):
        _x = x
        x, y = x
        recons = taglisp_to_tree(y)
        y = self._reorder_rec(recons)
        y = tree_to_taglisp(y)

        if self.reassign_ents_vars:
            ve = list(self.validentities)
            self.rnd.shuffle(ve)
            entmap = list(zip(sorted(ve), ve))
            vv = list(self.validvariables)
            self.rnd.shuffle(vv)
            varmap = list(zip(sorted(vv), vv))
            mapper = dict(entmap + varmap)

            xmapped = [
                mapper[xe.lower()].upper() if xe.lower() in mapper else xe
                for xe in x.split()
            ]
            ymapped = [mapper[ye] if ye in mapper else ye for ye in y.split()]

            x = " ".join(xmapped)
            y = " ".join(ymapped)
        return (x, y)