Exemplos de preserving_split em Python, exemplos de munge.lex.lex.preserving_split em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: parse.py Projeto: Oneplus/cnccgbank

def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)
    
    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv

Exemplo n.º 2

0

Exibir arquivo

def parse_tree(tree_string, node_factory=CCGNodeFactory):
    parser = CCGParser(node_factory)

    toks = preserving_split(tree_string, "()<>", suppressors='<>')

    deriv = parser.read_paren(toks)
    ensure_stream_exhausted(toks, 'ccg.parse_tree')

    return deriv

Exemplo n.º 3

0

Exibir arquivo

Arquivo: parse.py Projeto: Oneplus/cnccgbank

def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/|)[]")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemplo n.º 4

0

Exibir arquivo

def parse_category(cat_string):
    '''Parses a category string into a category object. Throws DocParseException if unconsumed
tokens remain.'''
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/|)[]")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks)
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemplo n.º 5

0

Exibir arquivo

Arquivo: parse.py Projeto: Oneplus/cnccgbank

def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string, "(\\/)[]{}~")# + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)
        
    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemplo n.º 6

0

Exibir arquivo

def parse_category(cat_string):
    # Return each mode symbol as a token too when encountered.
    # Important: avoid using mode symbols in atomic category labels.
    toks = preserving_split(cat_string,
                            "(\\/)[]{}~")  # + ComplexCategory.mode_symbols)

    result = parse_compound(toks, {})
    if toks.peek() == '~':
        result.alias = parse_alias(toks)

    ensure_stream_exhausted(toks, 'cats.parse_category')

    return result

Exemplo n.º 7

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''
        
        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(result, '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''.split(" "))

Exemplo n.º 8

0

Exibir arquivo

    def testPennSplit(self):
        s = ''' 
( (S 
    (NP-SBJ (NNP Mr.) (NNP Vinken) )
    (VP (VBZ is) 
      (NP-PRD 
        (NP (NN chairman) )
        (PP (IN of) 
          (NP 
            (NP (NNP Elsevier) (NNP N.V.) )
            (, ,) 
            (NP (DT the) (NNP Dutch) (VBG publishing) (NN group) )))))
    (. .) ))'''

        result = [tok for tok in preserving_split(s, r'()')]
        self.assertEqual(
            result,
            '''( ( S ( NP-SBJ ( NNP Mr. ) ( NNP Vinken ) ) ( VP ( VBZ is ) ( NP-PRD ( NP ( NN chairman ) ) ( PP ( IN of ) ( NP ( NP ( NNP Elsevier ) ( NNP N.V. ) ) ( , , ) ( NP ( DT the ) ( NNP Dutch ) ( VBG publishing ) ( NN group ) ) ) ) ) ) ( . . ) ) )'''
            .split(" "))

Exemplo n.º 9

0

Exibir arquivo

 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))

Exemplo n.º 10

0

Exibir arquivo

 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(
         result, r'< a href = " index.html " > Text < / a >'.split(" "))

Exemplo n.º 11

0

Exibir arquivo

 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek())  # peek must yield None

Exemplo n.º 12

0

Exibir arquivo

 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

 def testEmptyPeek(self):
     stream = preserving_split('', '@#$')
     self.assertRaises(StopIteration, stream.next)
     self.failIf(stream.peek()) # peek must yield None

Exemplo n.º 14

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

 def testPeek(self):
     stream = preserving_split('abc/def.ghi', './')
     for expected_tok in ('abc', '/', 'def', '.', 'ghi'):
         self.assertEqual(stream.peek(), expected_tok)
         stream.next()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

 def testOnlySplitOnWhitespace(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'@#$%')]
     self.assertEqual(result, r'<a href="index.html">Text</a>'.split(" "))

Exemplo n.º 18

0

Exibir arquivo

 def testSplitOnNothing(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, '', skip_chars='')]
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], s)

Exemplo n.º 19

0

Exibir arquivo

 def tokenise(self, tree_string, split_chars, suppressors):
     return preserving_split(tree_string, split_chars='', skip_chars=' \n', suppressors='')

Exemplo n.º 20

0

Exibir arquivo

 def testEmptyInput(self):
     result = [tok for tok in preserving_split('', '')]
     self.failIf(result)

Exemplo n.º 21

0

Exibir arquivo

 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))

Exemplo n.º 22

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

 def testAdjacentSplitters(self):
     result = [tok for tok in preserving_split(r'a.b.cd.ef..g', '.')]
     self.assertEqual(result, r'a . b . cd . ef . . g'.split(" "))

Exemplo n.º 23

0

Exibir arquivo

Arquivo: lex_tests.py Projeto: Oneplus/cnccgbank

 def testPreserves(self):
     s = r'<a href="index.html">Text</a>'
     result = [tok for tok in preserving_split(s, r'<>="/')]
     self.assertEqual(result, r'< a href = " index.html " > Text < / a >'.split(" "))