Пример #1
0
def test_external_db_error_when_migrations_not_applied(mocker, db):
    mocker.patch('sys.exit')
    mocker.patch('databot.db.migrations.Migrations.migrations', {migrations.ValueToMsgpack: set()})

    # Create tables, but do not apply any migrations
    engine = sa.create_engine('sqlite:///:memory:')
    models = Models(sa.MetaData())
    models.metadata.create_all(engine, checkfirst=True)
    bot1 = Bot(engine, output=io.StringIO(), models=models)
    bot1.define('p1')

    bot2 = db.Bot()
    bot2.define('p1', bot1.engine)
    bot2.define('p2')
    bot2.main(argv=['status'])
    assert bot2.output.output.getvalue() == dedent('''\
        External database 'sqlite:///:memory:' from 'p1' pipe has unapplied migrations.

        You need to run database migrations:

            databot sqlite:///:memory: migrate

        List of unapplied migrations:

          - ValueToMsgpack

           id              rows  source
               errors      left    target
        =================================
            1                 0  p1
        ---------------------------------
            2                 0  p2
        ---------------------------------
    ''')
Пример #2
0
    def html(self, code):
        bot = Bot()
        html = bot.define('html')
        p1 = bot.define('p1')

        with (here / 'fixtures/sample.html').open('rb') as f:
            content = f.read()

        html.append([('https://example.com/', {
            'headers': {},
            'cookies': {},
            'status_code': 200,
            'encoding': 'utf-8',
            'content': content,
        })])

        eval(code, {}, {
            'bot': bot,
            'html': html,
            'p1': p1(html),
            'this': this,
            'int': int,
            'select': select,
        })

        return pformat(list(p1.items()), width=42)
Пример #3
0
def main():
    words = ['贸易战']
    baidu_url = 'https://www.baidu.com/s?wd=%s'
    urls = [baidu_url % (word) for word in words]

    # make data flow net
    insert = Insert(
        "insert into test.baidu (id,name ,url,page_rank,page_no)values('{id}','{name}' ,'{url}',{page_rank},{page_no})",
        **dbconf)

    p = Pipe(
        urls,
        HttpLoader(),
        Branch(get_all_items, join=True),
        Branch(get_all_page_url,
               HttpLoader(),
               get_all_items,
               share=False,
               join=True,
               route_type=HttpResponse),
        insert,
    )

    Pipe(Timer(delay=2, until=p.finished), show_info)

    Bot.render('ex_output/baiduspider')
    Bot.run()
Пример #4
0
def test_run_limits_and_fail():
    def handler(row):
        if row.key == 'b':
            raise ValueError('b')
        else:
            yield row.key.upper()

    pipeline = {
        'tasks': [
            task('p1').once().append(['a', 'b', 'c']),
            task('p1', 'p2').call(handler),
        ],
    }

    bot = Bot()
    p1 = bot.define('p1')
    p2 = bot.define('p2')

    with pytest.raises(ExpressionError):
        bot.main(pipeline, ['run', '-l', '1,1,0'])

    assert list(p1.keys()) == ['a', 'b', 'c']
    assert list(p2.keys()) == ['A']
    assert pipeline['tasks'][0]._evals == 2
    assert pipeline['tasks'][1]._evals == 2
Пример #5
0
def test_run_limits_and_fail_smaller():
    def handler(row):
        if row.key == 'b':
            raise ValueError('b')
        else:
            yield row.key.upper()

    pipeline = {
        'tasks': [
            task('p1').once().append(['a', 'b', 'c']),
            task('p1', 'p2').call(handler),
        ],
    }

    bot = Bot()
    p1 = bot.define('p1')
    p2 = bot.define('p2')

    bot.main(pipeline, ['run', '-l', '1,1,0', '-f', '2'])

    assert list(p1.keys()) == ['a', 'b', 'c']
    assert list(p2.keys()) == ['A', 'C']
    assert list(p2(p1).errors.keys()) == ['b']
    assert pipeline['tasks'][0]._evals == 3
    assert pipeline['tasks'][1]._evals == 3
Пример #6
0
def test_external_db_error_when_migrations_not_applied(mocker, db):
    mocker.patch('sys.exit')
    mocker.patch('databot.db.migrations.Migrations.migrations',
                 {migrations.ValueToMsgpack: set()})

    # Create tables, but do not apply any migrations
    engine = sa.create_engine('sqlite:///:memory:')
    models = Models(sa.MetaData())
    models.metadata.create_all(engine, checkfirst=True)
    bot1 = Bot(engine, output=io.StringIO(), models=models)
    bot1.define('p1')

    bot2 = db.Bot()
    bot2.define('p1', bot1.engine)
    bot2.define('p2')
    bot2.main(argv=['status'])
    assert bot2.output.output.getvalue() == dedent('''\
        External database 'sqlite:///:memory:' from 'p1' pipe has unapplied migrations.

        You need to run database migrations:

            databot sqlite:///:memory: migrate

        List of unapplied migrations:

          - ValueToMsgpack

           id              rows  source
               errors      left    target
        =================================
            1                 0  p1
        ---------------------------------
            2                 0  p2
        ---------------------------------
    ''')
Пример #7
0
 def append(self, code):
     bot = Bot()
     p1 = bot.define('p1')
     eval(code, {}, {
         'bot': bot,
         'p1': p1,
     })
     return repr(list(p1.items()))
Пример #8
0
def main():
    Pipe(
        Timer(delay=2, max_time=5),
        "http://api.coindesk.com/v1/bpi/currentprice.json",
        HttpLoader(),
        lambda r: r.json['bpi']['USD']['rate_float'],
        print,
    )

    Bot.render('ex_output/simple_bitcoin_price')
    Bot.run()
Пример #9
0
def test_run_once():
    tasks = [
        task('p1').once().append(1),
        task('p1').once().append(2),
        task('p1').append(3),
    ]

    bot = Bot()
    p1 = bot.define('p1')

    bot.commands.run(tasks, limits=(1, 1, 0))
    assert list(p1.keys()) == [1, 2, 3, 3, 3]
Пример #10
0
 def duplicates(self, code):
     bot = Bot()
     p1 = bot.define('p1').append([
         (1, 'old'),
         (1, 'new'),
         (2, 'old'),
         (2, 'new'),
     ])
     eval(code, {}, {
         'bot': bot,
         'p1': p1,
     })
     return repr(list(p1.items()))
Пример #11
0
def main():
    words = ['贸易战', '世界杯']
    baidu_url = 'https://www.baidu.com/s?wd=%s'
    urls = [baidu_url % (word) for word in words]

    outputfile = aiofile('ex_output/baidu.txt')
    Pipe(
        urls,
        HttpLoader(),
        Branch(get_all_items, outputfile),
        Branch(get_all_page_url, HttpLoader(), get_all_items, outputfile),
    )
    #生成流程图
    Bot.render('ex_output/baiduspider')
    Bot.run()
Пример #12
0
def test_run_limits():
    pipeline = {
        'tasks': [
            task('p1').once().append(['a', 'b', 'c']),
            task('p1', 'p2').select(this.key.upper()),
        ],
    }

    bot = Bot()
    p1 = bot.define('p1')
    p2 = bot.define('p2')

    bot.main(pipeline, ['run', '-l', '1,1,0'])
    assert list(p1.keys()) == ['a', 'b', 'c']
    assert list(p2.keys()) == ['A', 'B', 'C']
    assert pipeline['tasks'][0]._evals == 3
    assert pipeline['tasks'][1]._evals == 3
Пример #13
0
def test_run():
    pipeline = {
        'pipes': [
            define('a'),
            define('b'),
        ],
        'tasks': [
            task('a').append(['a', 'A', 'b']),
            task('a', 'b').select(this.key.upper()),
            task().compact(),
        ],
    }

    bot = Bot()
    bot.main(pipeline, ['run', '-f'])

    assert list(bot.pipe('a').keys()) == ['a', 'A', 'b']
    assert list(bot.pipe('b').keys()) == ['A', 'B']
Пример #14
0
def test_rename(bot):
    bot.define('p1')
    bot.define('p2')
    bot.main(argv=['rename', 'p1', 'pp'])

    bot = Bot('sqlite:///:memory:', output=io.StringIO())
    bot.define('pp')
    bot.define('p2')
    bot.main(argv=['status'])

    assert bot.output.output.getvalue() == (
        '   id              rows  source\n'
        '       errors      left    target\n'
        '=================================\n'
        '    1                 0  pp\n'
        '---------------------------------\n'
        '    2                 0  p2\n'
        '---------------------------------\n'
    )
Пример #15
0
def test_run_freq():
    tasks = [
        task('p1').freq(days=3).append(['a']),
        task('p1', 'p2').select(this.key.upper()),
    ]

    bot = Bot()
    p1 = bot.define('p1')
    p2 = bot.define('p2')

    with freezegun.freeze_time('2017-01-01 00:00:00'):
        bot.commands.run(tasks)

    with freezegun.freeze_time('2017-01-02 00:00:00'):
        bot.commands.run(tasks)

    assert list(p1.keys()) == ['a']
    assert list(p2.keys()) == ['A']

    with freezegun.freeze_time('2017-01-04 00:00:00'):
        bot.commands.run(tasks)

    assert list(p1.keys()) == ['a', 'a']
    assert list(p2.keys()) == ['A', 'A']
Пример #16
0
def test_rename(bot):
    bot.define('p1')
    bot.define('p2')
    bot.main(argv=['rename', 'p1', 'pp'])

    bot = Bot('sqlite:///:memory:', output=io.StringIO())
    bot.define('pp')
    bot.define('p2')
    bot.main(argv=['status'])

    assert bot.output.output.getvalue() == (
        '   id              rows  source\n'
        '       errors      left    target\n'
        '=================================\n'
        '    1                 0  pp\n'
        '---------------------------------\n'
        '    2                 0  p2\n'
        '---------------------------------\n')
Пример #17
0
def test_init_path(tmpdir):
    path = str(tmpdir.join('data.db'))
    assert str(Bot(path).engine.url) == 'sqlite:///%s' % path
Пример #18
0
def test_run_target():
    pipeline = {
        'pipes': [],
        'tasks': [
            task('a').once().append(['a']),
            task('a', 'b').select(this.key.upper()),
            task('b', 'c').select(this.key.lower()),
            task().compact(),
        ],
    }

    bot = Bot()
    bot.define('a')
    bot.define('b')
    bot.define('c')

    bot.main(pipeline, ['run', 'a', '-f'])
    assert list(bot.pipe('a').keys()) == ['a']
    assert list(bot.pipe('b').keys()) == []
    assert list(bot.pipe('c').keys()) == []

    bot.main(pipeline, ['run', 'b', '-f'])
    assert list(bot.pipe('a').keys()) == ['a']
    assert list(bot.pipe('b').keys()) == ['A']
    assert list(bot.pipe('c').keys()) == []

    bot.pipe('a').append('b')
    bot.main(pipeline, ['run', 'a', 'b', '-f'])
    assert list(bot.pipe('a').keys()) == ['a', 'b']
    assert list(bot.pipe('b').keys()) == ['A', 'B']
    assert list(bot.pipe('c').keys()) == []

    bot.main(pipeline, ['run', 'b', 'c', '-f'])
    assert list(bot.pipe('a').keys()) == ['a', 'b']
    assert list(bot.pipe('b').keys()) == ['A', 'B']
    assert list(bot.pipe('c').keys()) == ['a', 'b']

    bot.pipe('b').append('C')
    bot.main(pipeline, ['run', 'c', '-f'])
    assert list(bot.pipe('a').keys()) == ['a', 'b']
    assert list(bot.pipe('b').keys()) == ['A', 'B', 'C']
    assert list(bot.pipe('c').keys()) == ['a', 'b', 'c']

    bot.main(pipeline, ['run', '-f'])
    assert list(bot.pipe('a').keys()) == ['b', 'a']
    assert list(bot.pipe('b').keys()) == ['B', 'C', 'A']
    assert list(bot.pipe('c').keys()) == ['b', 'c', 'a']
Пример #19
0
def test_init_with_engine_instance():
    Bot(sa.create_engine('sqlite:///:memory:'))
Пример #20
0
def test_init():
    assert str(Bot('sqlite:///:memory:').engine.url) == 'sqlite:///:memory:'
Пример #21
0
def test_autodefine():
    engine = sa.create_engine('sqlite:///:memory:')

    bot = Bot(engine)
    bot.define('a').append([1, 2, 3])

    bot = Bot(engine)
    with pytest.raises(KeyError) as e:
        bot.pipe('a')
    assert str(e.value) == "'a'"

    bot = Bot(engine).autodefine()
    assert list(bot.pipe('a').keys()) == [1, 2, 3]
Пример #22
0
#!/usr/bin/env python3

from databot import Bot, define, task, first

pipeline = {
    'pipes': [
        define('index'),
        define('news'),
    ],
    'tasks': [
        task('index').once().download('https://www.reddit.com/'),
        task('index', 'news').select([
            '.thing.link',
            ('.entry .title > a@href', {
                'title': '.entry .title > a:text',
                'score': '.midcol .score.likes@title',
                'time': first(['.tagline time@datetime']),
                'comments': '.entry a.comments:text',
            })
        ]),
        task('news').export('/tmp/reddit.jsonl'),
        task().compact(),
    ],
}

if __name__ == '__main__':
    Bot('/tmp/reddit.db').main(pipeline)
Пример #23
0
def show_info(i):
    Bot.debug()
Пример #24
0
def test_init_no_args():
    assert str(Bot().engine.url) == 'sqlite:///:memory:'