示例#1
0
def test_parse_should_return_a_list_with_the_blanks_contents():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    result = template.parse('a b c d')
    expected = ['', 'b c', '']
    assert result == expected
示例#2
0
def test_should_be_able_to_adjust_minimum_size_of_a_block():
    t = Templater(min_block_size=2)
    t.learn('git and pyth')
    t.learn('eggs and spam')
    expected = [None, ' and ', None]
    result = t._template
    assert expected == result
示例#3
0
def test_should_be_able_to_adjust_minimum_size_of_a_block():
    t = Templater(min_block_size=2)
    t.learn('git and pyth')
    t.learn('eggs and spam')
    expected = [None, ' and ', None]
    result = t._template
    assert expected == result
示例#4
0
def test_parse_should_return_a_list_with_the_blanks_contents():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    result = template.parse('a b c d')
    expected = ['', 'b c', '']
    assert result == expected
示例#5
0
def test_join_should_fill_the_blanks_with_elements_received():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    parsed = template.parse('a b c d')
    result = template.join(parsed)
    expected = 'a b c d'
    assert result == expected
示例#6
0
def test_join_should_fill_the_blanks_with_elements_received():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    parsed = template.parse('a b c d')
    result = template.join(parsed)
    expected = 'a b c d'
    assert result == expected
示例#7
0
def test_new_learn_text_trying_to_delete_some_variable():
    template = Templater()
    template.learn('<b> a and b </b>')
    template.learn('<b> c and d </b>')
    template.learn('<b> e and  </b>')
    result = template._template
    expected = [None, '<b> ', None, ' and ', None, ' </b>', None]
    assert result == expected
示例#8
0
def test_new_learn_text_trying_to_delete_some_variable():
    template = Templater()
    template.learn('<b> a and b </b>')
    template.learn('<b> c and d </b>')
    template.learn('<b> e and  </b>')
    result = template._template
    expected = [None, '<b> ', None, ' and ', None, ' </b>', None]
    assert result == expected
示例#9
0
def test_template_with_named_markers_should_not_be_able_to_learn():
    t = Templater(template='{{one}}<u>{{two}}</u>{{three}}',
                  marker=regexp_marker)
    try:
        t.learn('a<u>b</u>c')
    except NotImplementedError:
        pass
    else:
        print(t._template)
        assert 'NotImplementedError not raised' == False
示例#10
0
def test_join_with_less_parameters_than_variables_should_raise_AttributeError():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    try:
        result = template.join([''])
    except AttributeError:
        pass
    else:
        assert 'AttributeError not raised!' == False
示例#11
0
def test_template_with_named_markers_should_not_be_able_to_learn():
    t = Templater(template='{{one}}<u>{{two}}</u>{{three}}',
                  marker=regexp_marker)
    try:
        t.learn('a<u>b</u>c')
    except NotImplementedError:
        pass
    else:
        print t._template
        assert 'NotImplementedError not raised' == False
示例#12
0
    def find_splits(self, column: List[Cell]) -> Iterable[CompoundSplit]:
        from templater import Templater

        column = [c.get("text", "") for c in column]
        template = Templater(min_block_size=self.min_block_size)
        for cell in column:
            try:
                template.learn(cell)
            except:
                log.debug(f"Failed to add {cell} to template")
                return

        if any(template._template):
            log.debug(f"Template found: {template._template}")
            try:
                newrows = []
                for cell in column:
                    newrows.append(map(str.strip, template.parse(cell)))
                newcols = zip(*newrows)
                if newcols:
                    for i, newcol in enumerate(newcols):
                        if self.col_is_ok(newcol):

                            prefix = template._template[i].strip()
                            if prefix:
                                if prefix.isnumeric():  # TODO: check numeric suffix
                                    newcol = tuple([prefix + c for c in newcol])
                                    prefix = str(i)
                            else:
                                prefix = str(i)

                            yield CompoundSplit(
                                prefix, "string", [{"text": c} for c in newcol]
                            )
            except Exception as e:
                log.debug(f"Failed to parse {cell} using template {template._template}")
示例#13
0
#!/usr/bin/env python
# coding: utf-8

from time import time
from glob import glob
from templater import Templater


files = glob('html/*.html') # You must have some .html files in html/
template = Templater()
print('Time to learn')
start = time()
for filename in files:
    print('  Learning "%s"...' % filename,)
    fp = open(filename)
    template.learn(fp.read())
    fp.close()
    print('OK')
end = time()
print(' Time:', end - start)

print('Template created:')
print(template._template)

print('Now, work!')
start = time()
for filename in files:
    print('  Parsing "%s"...' % filename)
    fp = open(filename)
    print('  Result:', template.parse(fp.read()))
    fp.close()
示例#14
0
#!/usr/bin/env python
# coding: utf-8

from templater import Templater


str_1 = 'my favorite color is blue'
str_2 = 'my favorite color is violet'
print 'Learning from:'
print '  ', str_1
print '  ', str_2

t = Templater() # default min_block_size = 1
t.learn(str_1)
t.learn(str_2)
print 'Template for min_block_size=1 (default):'
print '  ', t._template

t = Templater(min_block_size=2)
t.learn(str_1)
t.learn(str_2)
print 'Template for min_block_size=2:'
print '  ', t._template
示例#15
0
#!/usr/bin/env python
# coding: utf-8

from os import unlink
from templater import Templater


t = Templater()
t.learn('<b>spam</b>')
t.learn('<b>eggs</b>')
t.learn('<b>ham</b>')
t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF
t.dump('my-template.tpl')
print(t.parse('<b>parsing using first template object</b>'))

t2 = Templater.open('my-little-template.html', marker='|||')
# it removes `\n`/`\r\n` in the end of file before creating template definition
print(t2.parse('<b>parsing using second template object</b>'))

t3 = Templater.load('my-template.tpl')
print(t3.parse('<b>parsing using third template object</b>'))

# 'my-little-template.html' will have the template string with blanks filled by
# '|||'
# 'my-template.tpl' will have the pickle of Templater object

# Removing files:
unlink('my-little-template.html')
unlink('my-template.tpl')
示例#16
0
#!/usr/bin/env python
# coding: utf-8

from os import unlink
from templater import Templater


t = Templater()
t.learn('<b>spam</b>')
t.learn('<b>eggs</b>')
t.learn('<b>ham</b>')
t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF
t.dump('my-template.tpl')
print t.parse('<b>parsing using first template object</b>')

t2 = Templater.open('my-little-template.html', marker='|||')
# it removes `\n`/`\r\n` in the end of file before creating template definition
print t2.parse('<b>parsing using second template object</b>')

t3 = Templater.load('my-template.tpl')
print t3.parse('<b>parsing using third template object</b>')

# 'my-little-template.html' will have the template string with blanks filled by
# '|||'
# 'my-template.tpl' will have the pickle of Templater object

# Removing files:
unlink('my-little-template.html')
unlink('my-template.tpl')
示例#17
0
#!/usr/bin/env python
# coding: utf-8

from time import time
from glob import glob
from templater import Templater


files = glob('html/*.html') # You must have some .html files in html/
template = Templater()
print 'Time to learn'
start = time()
for filename in files:
    print '  Learning "%s"...' % filename,
    fp = open(filename)
    template.learn(fp.read())
    fp.close()
    print 'OK'
end = time()
print ' Time:', end - start

print 'Template created:'
print template._template

print 'Now, work!'
start = time()
for filename in files:
    print '  Parsing "%s"...' % filename
    fp = open(filename)
    print '  Result:', template.parse(fp.read())
    fp.close()
示例#18
0
#!/usr/bin/env python
# coding: utf-8

from templater import Templater

texts_to_learn = [
    '<b> spam and eggs </b>', '<b> ham and spam </b>',
    '<b> white and black </b>'
]
text_to_parse = texts_to_learn[-1]
template = Templater()
for text in texts_to_learn:
    print('Learning "%s"...' % text)
    template.learn(text)
print('Template created:', template._template)
print('Parsing text "%s"...' % text_to_parse)
print('  Result:', template.parse(text_to_parse))
print('Filling the blanks:', template.join(['', 'yellow', 'blue', '']))
示例#19
0
#!/usr/bin/env python
# coding: utf-8

from templater import Templater


texts_to_learn = ['<b> spam and eggs </b>', '<b> ham and spam </b>',
                  '<b> white and black </b>']
text_to_parse = texts_to_learn[-1]
template = Templater()
for text in texts_to_learn:
    print 'Learning "%s"...' % text
    template.learn(text)
print 'Template created:', template._template
print 'Parsing text "%s"...' % text_to_parse
print '  Result:', template.parse(text_to_parse)
print 'Filling the blanks:', template.join(['', 'yellow', 'blue', ''])