Пример #1
0
def buildChart(tokens,lexicon,combinators,terminators):
   counter = {}
   #-- normal form parsing
   def nf_check(fc , path1 , path2):
       #-- restrictions on type-raising and composition
       if len(path2)==3 and fc==RBx and path2[1]=="LT":
           return False
       elif len(path1)==3 and fc==LBx and path1[1]=="RT":
           return False
       #-- conjunction modality
       elif len(path2)==5 and path2[3]=="Conj" and fc!=LApp:
           return False
       #-- NF constraint 1 and 2
       elif len(path1)==5 and path1[3] in ["LB","LBx"] and fc in [LB,LBx]:
           return False
       elif len(path2)==5 and path2[3] in ["LB","LBx"] and fc==LApp:
           return False
       elif len(path1)==5 and path1[3] in ["RB","RBx"] and fc in [RApp,RB,RBx]:
           return False
       #-- NF constraint 5
       elif len(path1)==3 and (fc,path1[1])in [(RApp,"RT")]:
           return False
       #-- NF constraint 5
       elif len(path2)==3 and (fc,path2[1]) in [(LApp,"LT")]:
           return False
       else:
           return True
   def getNargs(f):
       if inspect.isfunction(f):
          return len(inspect.getargspec(f).args)
       else:
          return len(inspect.getargspec(f.__call__).args)-1
   unary_combinators = [f for f in combinators if getNargs(f)==1]
   binary_combinators = [f for f in combinators if getNargs(f)==2]
   chart = {}
   max_depth = 0
   N = len(tokens)
   for n in range(N):
      for m in range(n,N):
          chart[(n,m)] = [(lexify(c),tuple([max_depth])) for c in lexicon.get(tokens[n:m+1] , [])]
          #-- add type raising
          rest = []
          for idx0,(cat,_) in enumerate(chart.get((n,m),[])):
              for f in unary_combinators:
                  cat2 = f(cat)
                  path = (idx0 , f.__name__ , max_depth)
                  if cat2!=None:rest.append( (cat2 , path) )
          chart[(n,m)] = chart.get((n,m),[]) + rest
   for cat2,_ in chart.get( (0,N-1) , []):
       if terminators==None:
           yield chart
       elif catname(cat2) in terminators:
           yield chart
   if all([any([len(chart.get((m0,m1),[]))>0 for (m0,m1) in chart.keys() if m0<=n and n<=m1]) for n in range(N)]):
      #-- modified CYK parsing
      for max_depth in range(0 , N):
         new_items = []
         all_pairs = set([])
         for (s1,e1) in chart.keys():
            for (s2,e2) in chart.keys():
               if(e1-s1)<max_depth and (e2-s2)<max_depth:
                  continue
               elif s2==e1+1:
                  left_start,left_end = s1,e1
                  right_start,right_end = s2,e2
               elif s1==e2+1:
                  left_start,left_end = s2,e2
                  right_start,right_end = s1,e1
               else:
                  continue
               all_pairs.add( (left_start,left_end,right_start,right_end) )
         for (left_start,left_end,right_start,right_end) in all_pairs:
              for idx1,(Lcat,Lpath) in enumerate(chart.get((left_start,left_end),[])):
                  Ldepth = Lpath[-1]
                  for idx2,(Rcat,Rpath) in enumerate(chart.get((right_start,right_end),[])):
                     Rdepth = Rpath[-1]
                     if not((Ldepth==max_depth and Rdepth<=max_depth) or (Rdepth==max_depth and Ldepth<=max_depth)):continue
                     if type(Lcat)==list and type(Rcat)==list and Lcat[0]==FORALL and Rcat[0]==FORALL:continue
                     for f in binary_combinators:
                         if nf_check(f,Lpath,Rpath):
                            cat2 = f(Lcat,Rcat)
                            if cat2!=None:
                               key=(catname(cat2),left_start,right_end)
                               counter[key] = counter.get(key,0)+1
                               path = (idx1,idx2,left_end,f.__name__,max_depth+1)
                               if left_start==0 and right_end==N-1:
                                  if terminators==None:
                                      chart.setdefault( (left_start,right_end) , []).append( (cat2 , path) )
                                      yield chart
                                  elif catname(cat2) in terminators:
                                      chart.setdefault( (left_start,right_end) , []).append( (cat2 , path) )
                                      yield chart
                               elif counter[key]<2:
                                  new_items.append( (left_start,right_end,cat2,path) )
                               break   #-- is it OK?
         for (left_start,right_end,cat2,path) in new_items:
             chart.setdefault( (left_start,right_end) , []).append( (cat2 , path) )
         #-- add type raising
         rest = []
         for (left_start,right_end) in chart.keys():
              if left_start!=0 or right_end!=N-1:
                  for idx,(cat,path0) in enumerate(chart.get((left_start,right_end),[])):
                      assert(cat!=None),cat
                      if path0[-1]!=max_depth+1:continue
                      if len(path0)==5 and path0[3]=="Conj":continue
                      if len(path0)==5 and path0[3]=="SkipComma":continue
                      for f in unary_combinators:
                          cat2 = f(cat)
                          if cat2!=None:
                              path = (idx , f.__name__ , max_depth+1)
                              rest.append( (left_start,right_end , cat2 , path) )
         for (left_start,right_end,cat2,path) in rest:
             chart.setdefault( (left_start,right_end) , []).append( (cat2 , path) )
Пример #2
0
def buildChart(tokens, lexicon, combinators, terminators):
    counter = {}

    #-- normal form parsing
    def nf_check(fc, path1, path2):
        #-- restrictions on type-raising and composition
        if len(path2) == 3 and fc == RBx and path2[1] == "LT":
            return False
        elif len(path1) == 3 and fc == LBx and path1[1] == "RT":
            return False
        #-- conjunction modality
        elif len(path2) == 5 and path2[3] == "Conj" and fc != LApp:
            return False
        #-- NF constraint 1 and 2
        elif len(path1) == 5 and path1[3] in ["LB", "LBx"] and fc in [LB, LBx]:
            return False
        elif len(path2) == 5 and path2[3] in ["LB", "LBx"] and fc == LApp:
            return False
        elif len(path1) == 5 and path1[3] in ["RB", "RBx"
                                              ] and fc in [RApp, RB, RBx]:
            return False
        #-- NF constraint 5
        elif len(path1) == 3 and (fc, path1[1]) in [(RApp, "RT")]:
            return False
        #-- NF constraint 5
        elif len(path2) == 3 and (fc, path2[1]) in [(LApp, "LT")]:
            return False
        else:
            return True

    def getNargs(f):
        if inspect.isfunction(f):
            return len(inspect.getargspec(f).args)
        else:
            return len(inspect.getargspec(f.__call__).args) - 1

    unary_combinators = [f for f in combinators if getNargs(f) == 1]
    binary_combinators = [f for f in combinators if getNargs(f) == 2]
    chart = {}
    max_depth = 0
    N = len(tokens)
    for n in range(N):
        for m in range(n, N):
            chart[(n, m)] = [(lexify(c), tuple([max_depth]))
                             for c in lexicon.get(tokens[n:m + 1], [])]
            #-- add type raising
            rest = []
            for idx0, (cat, _) in enumerate(chart.get((n, m), [])):
                for f in unary_combinators:
                    cat2 = f(cat)
                    path = (idx0, f.__name__, max_depth)
                    if cat2 != None: rest.append((cat2, path))
            chart[(n, m)] = chart.get((n, m), []) + rest
    for cat2, _ in chart.get((0, N - 1), []):
        if terminators == None:
            yield chart
        elif catname(cat2) in terminators:
            yield chart
    if all([
            any([
                len(chart.get((m0, m1), [])) > 0 for (m0, m1) in chart.keys()
                if m0 <= n and n <= m1
            ]) for n in range(N)
    ]):
        #-- modified CYK parsing
        for max_depth in range(0, N):
            new_items = []
            all_pairs = set([])
            for (s1, e1) in chart.keys():
                for (s2, e2) in chart.keys():
                    if (e1 - s1) < max_depth and (e2 - s2) < max_depth:
                        continue
                    elif s2 == e1 + 1:
                        left_start, left_end = s1, e1
                        right_start, right_end = s2, e2
                    elif s1 == e2 + 1:
                        left_start, left_end = s2, e2
                        right_start, right_end = s1, e1
                    else:
                        continue
                    all_pairs.add(
                        (left_start, left_end, right_start, right_end))
            for (left_start, left_end, right_start, right_end) in all_pairs:
                for idx1, (Lcat, Lpath) in enumerate(
                        chart.get((left_start, left_end), [])):
                    Ldepth = Lpath[-1]
                    for idx2, (Rcat, Rpath) in enumerate(
                            chart.get((right_start, right_end), [])):
                        Rdepth = Rpath[-1]
                        if not ((Ldepth == max_depth and Rdepth <= max_depth)
                                or
                                (Rdepth == max_depth and Ldepth <= max_depth)):
                            continue
                        if type(Lcat) == list and type(Rcat) == list and Lcat[
                                0] == FORALL and Rcat[0] == FORALL:
                            continue
                        for f in binary_combinators:
                            if nf_check(f, Lpath, Rpath):
                                cat2 = f(Lcat, Rcat)
                                if cat2 != None:
                                    key = (catname(cat2), left_start,
                                           right_end)
                                    counter[key] = counter.get(key, 0) + 1
                                    path = (idx1, idx2, left_end, f.__name__,
                                            max_depth + 1)
                                    if left_start == 0 and right_end == N - 1:
                                        if terminators == None:
                                            chart.setdefault(
                                                (left_start, right_end),
                                                []).append((cat2, path))
                                            yield chart
                                        elif catname(cat2) in terminators:
                                            chart.setdefault(
                                                (left_start, right_end),
                                                []).append((cat2, path))
                                            yield chart
                                    elif counter[key] < 2:
                                        new_items.append(
                                            (left_start, right_end, cat2,
                                             path))
                                    break  #-- is it OK?
            for (left_start, right_end, cat2, path) in new_items:
                chart.setdefault((left_start, right_end), []).append(
                    (cat2, path))
            #-- add type raising
            rest = []
            for (left_start, right_end) in chart.keys():
                if left_start != 0 or right_end != N - 1:
                    for idx, (cat, path0) in enumerate(
                            chart.get((left_start, right_end), [])):
                        assert (cat != None), cat
                        if path0[-1] != max_depth + 1: continue
                        if len(path0) == 5 and path0[3] == "Conj": continue
                        if len(path0) == 5 and path0[3] == "SkipComma":
                            continue
                        for f in unary_combinators:
                            cat2 = f(cat)
                            if cat2 != None:
                                path = (idx, f.__name__, max_depth + 1)
                                rest.append(
                                    (left_start, right_end, cat2, path))
            for (left_start, right_end, cat2, path) in rest:
                chart.setdefault((left_start, right_end), []).append(
                    (cat2, path))
Пример #3
0
 def __init__(self,categoryName):
     self.category = lexify(categoryName)
     self.__name__ = self.__class__.__name__
Пример #4
0
 def __init__(self, categoryName):
     self.category = lexify(categoryName)
     self.__name__ = self.__class__.__name__