Пример #1
0
    def get_activity(self, page):
        activity = []
        headline = page.find(':contains("Derularea procedurii legislative")')
        table = list(headline.parents('table').items())[-1]

        date = None
        seen_data = False
        location = None
        location_countdown = 0
        buffer = []
        ac = None
        for row in table.children().items():
            if location_countdown > 0:
                location_countdown -= 1

            cols = row.children()

            date_text = cols.eq(0).text()
            if date_text == 'Data':
                seen_data = True
                continue
            elif not seen_data:
                continue

            if date_text:
                if ac:
                    activity.append(ac)
                ac = Activity(
                    date=datetime.strptime(date_text, '%d.%m.%Y').date(),
                    location=location,
                    html="",
                )

            last_col = pq(cols[-1])
            if last_col.attr('rowspan'):
                assert location_countdown == 0
                location_countdown = int(last_col.attr('rowspan'))
                location = last_col.text()

            else:
                last_col.find('img[src="/img/spacer.gif"]').remove()
                (last_col.find('img[src="/img/icon_pdf_small.gif"]')
                    .replaceWith('(pdf)'))
                html = last_col.html()
                if html:
                    ac.html += sanitize(html) + '\n'

        if ac:
            activity.append(ac)

        return activity
Пример #2
0
    def get_activity(self, page):
        activity = []
        headline = page.find(':contains("Derularea procedurii legislative")')
        table = list(headline.parents('table').items())[-1]

        date = None
        seen_data = False
        location = None
        location_countdown = 0
        buffer = []
        ac = None
        for row in table.children().items():
            if location_countdown > 0:
                location_countdown -= 1

            cols = row.children()

            date_text = cols.eq(0).text()
            if date_text == 'Data':
                seen_data = True
                continue
            elif not seen_data:
                continue

            if date_text:
                if ac:
                    activity.append(ac)
                ac = Activity(
                    date=datetime.strptime(date_text, '%d.%m.%Y').date(),
                    location=location,
                    html="",
                )

            last_col = pq(cols[-1])
            if last_col.attr('rowspan'):
                assert location_countdown == 0
                location_countdown = int(last_col.attr('rowspan'))
                location = last_col.text()

            else:
                last_col.find('img[src="/img/spacer.gif"]').remove()
                (last_col.find(
                    'img[src="/img/icon_pdf_small.gif"]').replaceWith('(pdf)'))
                html = last_col.html()
                if html:
                    ac.html += sanitize(html) + '\n'

        if ac:
            activity.append(ac)

        return activity
Пример #3
0
def test_sanitize(in_html, out_html):
    from mptracker.scraper.common import sanitize
    assert sanitize(in_html) == out_html
Пример #4
0
def test_sanitize(in_html, out_html):
    from mptracker.scraper.common import sanitize

    assert sanitize(in_html) == out_html