Exemplo n.º 1
0
def main():
    botlib.runbot({
        'pipes': [
            define('places'),
        ],
        'tasks': [
            task('places').daily().clean().append(query_places(), progress='places'),
            task('places').export('data/osm/places.csv', include=[
                'osm_id',
                'type',
                'place',
                'population',
                'wikipedia_title',
                'wikipedia_lang',
                'lon',
                'lat',
                'admin_level_6_osm_id',
                'admin_level_6',
                'admin_level_5_osm_id',
                'admin_level_5',
                'admin_level_4_osm_id',
                'admin_level_4',
            ])
        ],
    })
Exemplo n.º 2
0
                    'question-url': row.key,
                    'date': 'xpath:./ancestor::table//tr[contains(th/text(), "Posėdžio data")]/td/text()',
                    'status': 'xpath:./ancestor::table//tr[contains(th/text(), "Būsena")]/td/text()',
                })
            ])

    with bot.pipe('attachment preview links'):
        with bot.pipe('attachment preview').download(update={'source': row.value}):
            key = call(clean_redirect_url, 'xpath:/html/head/meta[@http-equiv="refresh"]/@content?')
            with bot.pipe('attachment preview links').select([(key, row.value['source'])]):
                bot.pipe('attachment preview').download(update={'source': row.value})

    with bot.pipe('attachment links'):
        bot.pipe('attachments').download(update={
            'question-url': row.value['question-url'],
            'date': row.value['date'],
            'status': row.value['status'],
        })

    bot.pipe('attachment preview').export('data/vilnius/vtaryba/attachment-previews.csv', include=['key', 'size', 'source'], update={
        'size': row.value['content'].length,
    })

    bot.pipe('attachments').export('data/vilnius/vtaryba/attachments.csv', update=attachment_export)

    bot.compact()


if __name__ == '__main__':
    botlib.runbot(define, run)
Exemplo n.º 3
0
def define(bot):
    bot.define('street suggestions')
    bot.define('streets')
    bot.define('pages')
    bot.define('extract search results')
    bot.define('osm addresses')
    bot.define('extract osm addresses')


def run(bot):
    bot.compact()

    start_url = 'http://www.manogyvunai.lt/m/m_animalproblems/files/ajax_workaround.php'

    if bot.pipe('street suggestions').data.count() == 0:
        for letter in list(string.ascii_lowercase):
            query = urlencode({'getStreetsByLetters': '1', 'letters': letter})
            resp = requests.post('%s?%s' % (start_url, query))
            bot.pipe('street suggestions').append(letter, resp.text)

    with bot.pipe('street suggestions'):
        with bot.pipe('streets').call(extract_streets).dedup():
            with bot.pipe('pages').call(download_page):
                bot.pipe('extract search results').call(extract_search_results)

    bot.pipe('extract search results').export('data/gyvunai.csv')


if __name__ == '__main__':
    botlib.runbot(define, run)
Exemplo n.º 4
0
        # Vardai
        task('vardai-puslapiai', 'vardai').select(
            this.key.urlparse().path, {
                'lytis':
                select('#page-left xpath:.//h1[1]/@class'),
                'vardas':
                select('#page-left xpath:.//h1[1]/strong/text()'),
                'kilmė':
                select(
                    '#name-info xpath:./p[strong/text() = "Vardo kilmė:"]/text()?'
                ).null().strip(),
                'vardadienis':
                select(
                    '#name-info xpath:./p[strong/text() = "Vardadienis:"]/text()?'
                ).null().replace('\xa0', ' ').strip(),
                'reikšmė':
                select(
                    '#name-info xpath:./p[strong/text() = "Vardo reikšmė:"]?').
                null().text(exclude=['xpath:./strong[1]']),
                'panašūs vardai': [
                    '#name-info xpath:./p[strong/text() = "Panašūs ir giminingi vardai:"]/a/text()'
                ],
                'populiarumas':
                this.value.content.apply(populiarumas),
            }),
    ],
}

if __name__ == '__main__':
    botlib.runbot(pipeline)