Пример #1
0
      if type(head) == list:
         lHead = head
      else:
         lHead = [head]
      # output the dep node
      for head in lHead:
         lTokens = []
         head_child = self.find_head(head, lTokens)
         if self.m_bLabeled:
            head_child.label = 'ROOT'
         for tokn in lTokens:
            if self.m_bLabeled:
               print "\t".join([gb2utf(tokn.token), tokn.pos, str(tokn.link), tokn.label])
            else:
               print "\t".join([gb2utf(tokn.token), tokn.pos, str(tokn.link)])
         print # empty line

#================================================================

if __name__ == '__main__':
   from tools import config
   if len(sys.argv) < 3:
      print "\nUsage: ctb2dep.py rule_file config_file [log_file] > output\n"
      sys.exit(1)
   sLogs = None
   if len(sys.argv) == 4:
      sLogs = sys.argv[3]
   cf = config.CConfig(sys.argv[2])
   rule = CHeadRules(sys.argv[1], sLogs, cf.labeled)
   fiditer.sentence_iterator(rule.process, cf.directory, cf.range)
Пример #2
0
import getopt
import binarize

if __name__ == '__main__':
   import sys
   import config
   try:
      opts, args = getopt.getopt(sys.argv[1:], "nul:")
   except getopt.GetoptError: 
      print "\nUsage: binarize.py [-nu] [-llogfile] rule_file config_file > output\n"
      print "-n: not binarize\n"
      sys.exit(1)
   if len(args) < 2:
      print "\nUsage: binarize.py [-nu] [-llogfile] rule_file config_file > output\n"
      print "-n: not binarize\n"
      print "-u: remove unary nodes\n"
      sys.exit(1)
   sLogs = None
   bBinarize = True
   bRemoveUnary = False
   for opt in opts:
      if opt[0] == '-n':
         bBinarize = False
      elif opt[0] == '-l':
         sLogs = opt[1]
      elif opt[0] == '-u':
         bRemoveUnary = True
   rule = binarize.CBinarizer(args[0], sLogs, bBinarize, bRemoveUnary)
   cf = config.CConfig(args[1])
   fiditer.sentence_iterator(rule.process, cf.directory, cf.range)
Пример #3
0
        return node
    else:
        node.type = "token"
        sToken, nIndex = __find_token(sSentence, nIndex)
        node.token = sToken
        assert sSentence[nIndex] == ")"
        node.end_index = nIndex
        return node


#================================================================


def fromString(sSentence):
    if sSentence.strip() == "":
        return "Empty sentence"
    return __find_node(sSentence, 0)


#================================================================

if __name__ == '__main__':
    import fiditer
    import config
    cf = config.CConfig("fid.config")

    def printx(x):
        print fromString(x)

    fiditer.sentence_iterator(printx, cf.directory, cf.range)
Пример #4
0
         nIndex = child.end_index
         nIndex = __next_index(sSentence, nIndex+1)
      node.children = lChildren
      assert sSentence[nIndex] == ")"
      node.end_index = nIndex
      return node
   else:
      node.type = "token"
      sToken, nIndex = __find_token(sSentence, nIndex)
      node.token = sToken
      assert sSentence[nIndex]==")"
      node.end_index = nIndex
      return node

#================================================================

def parse_object(sSentence):
   if sSentence.strip() == "":
      return "Empty sentence"
   return __find_node(sSentence, 0)

#================================================================

if __name__ == '__main__':
   import fiditer
   import config
   cf = config.CConfig("fid.config")
   def printx(x):
      print parse_object(x)
   fiditer.sentence_iterator( printx, cf.directory, cf.range )