Exemplo n.º 1
0
def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)
    
    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv
Exemplo n.º 2
0
def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)

    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv
Exemplo n.º 3
0
def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/|)[]")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Exemplo n.º 4
0
def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/|)[]")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Exemplo n.º 5
0
def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/)[]{}~")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)
        
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Exemplo n.º 6
0
def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/)[]{}~")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)

    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result
Exemplo n.º 7
0
    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''
        
        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(result, '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''.split(" "))
Exemplo n.º 8
0
    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''

        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(
            result,
            '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''
            .split(" "))
Exemplo n.º 9
0
 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))
Exemplo n.º 10
0
 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(
         result, r'< a href = " index.html " > Text < / a >'.split(" "))
Exemplo n.º 11
0
 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek())  # peek must yield None
Exemplo n.º 12
0
 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()
Exemplo n.º 13
0
 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek()) # peek must yield None
Exemplo n.º 14
0
 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()
Exemplo n.º 15
0
 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)
Exemplo n.º 16
0
 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)
Exemplo n.º 17
0
 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))
Exemplo n.º 18
0
 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)
Exemplo n.º 19
0
 def tokenise(self, tree_string, split_chars, suppressors):
     return preserving_split(tree_string, split_chars='', skip_chars=' \n', suppressors='')
Exemplo n.º 20
0
 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)
Exemplo n.º 21
0
 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))
Exemplo n.º 22
0
 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))
Exemplo n.º 23
0
 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(result, r'< a href = " index.html " > Text < / a >'.split(" "))