示例#1
0
    def test_empty_scalars_return_nullnode(self):
        node = Soupy('<a></a>').find('a')
        assert isinstance(node.next_sibling, NullNode)
        assert isinstance(node.previous_sibling, NullNode)

        node = Soupy('<a></a>')
        assert isinstance(node.parent, NullNode)
示例#2
0
    def test_failed_search(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        with pytest.raises(NullValueError):
            node.find_all('a').dump(
                a=Q.find('b').text
            )
示例#3
0
    def test_orelse(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        result = node.find_all('a').dump(
            a=Q.find('b').text.map(int).orelse(0)).val()

        assert result == [{'a': 1}, {'a': 0}]
示例#4
0
    def test_orelse(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        result = node.find_all('a').dump(
            a=Q.find('b').text.map(int).orelse(0)
        ).val()

        assert result == [{'a': 1}, {'a': 0}]
示例#5
0
    def test_multi_dump(self):
        node = Soupy('<a val="1">1</a><a>2</a><a val="3">3</a>')

        result = node.find_all('a').dump(
            a=Q.text,
            b=Q.attrs.get('val')).val()
        assert result == [{'a': '1', 'b': '1'},
                          {'a': '2', 'b': None},
                          {'a': '3', 'b': '3'}]
示例#6
0
    def test_navstring_dump(self):
        node = Soupy('<div><a>1</a>2<a>3</a></div>')

        result = node.find('div').contents.each(Q.text).val()
        assert result == ['1', '2', '3']

        result = (node.find('div').contents.each(
            Q.contents[0].text.orelse('!')).val())
        assert result == ['1', '!', '3']
示例#7
0
    def test_navstring_dump(self):
        node = Soupy('<div><a>1</a>2<a>3</a></div>')

        result = node.find('div').contents.each(Q.text).val()
        assert result == ['1', '2', '3']

        result = (node.find('div').contents
                  .each(Q.contents[0].text.orelse('!'))
                  .val())
        assert result == ['1', '!', '3']
示例#8
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]

        result = node.find_all('a').dump(Q.text).val()
        assert result == [('1',), ('2',), ('3',)]

        with pytest.raises(ValueError):
            node.find('a').dump(Q.text, a=Q.text)
示例#9
0
 def test_find_multi_methods(self, method):
     node = Soupy("""
         <div>
            <div></div>
            <b><div></div></b>
            <div></div>
        </div>
        """).find('b')
     dom = node.val()
     expected = getattr(dom, method)('div')
     assert expected
     actual = getattr(node, method)('div').val()
     assert actual == expected
示例#10
0
 def test_find_multi_methods(self, method):
     node = Soupy("""
         <div>
            <div></div>
            <b><div></div></b>
            <div></div>
        </div>
        """).find('b')
     dom = node.val()
     expected = getattr(dom, method)('div')
     assert expected
     actual = getattr(node, method)('div').val()
     assert actual == expected
示例#11
0
    def test_multi_dump(self):
        node = Soupy('<a val="1">1</a><a>2</a><a val="3">3</a>')

        result = node.find_all('a').dump(a=Q.text, b=Q.attrs.get('val')).val()
        assert result == [{
            'a': '1',
            'b': '1'
        }, {
            'a': '2',
            'b': None
        }, {
            'a': '3',
            'b': '3'
        }]
示例#12
0
 def get_chart(self, chart_name):
     self.chart_list = []
     self.chart_name = chart_name.lower()
     global KeyError
     try:
         number = self.chart_titles_dict[self.chart_name]
     except KeyError:
         #return json.dumps(["That chart does not exist"], indent = 2)
         raise ValueError('That chart does not exist')
     self.url = self.base_url + str(number)
     raw = requests.get(self.url)
     soup = Soupy(raw.text)
     tr_container = soup.find_all('tr', {'class': 'latc_song'})
     global NameError
     pos = 0
     song_title_constant = 2
     song_artist_constant = 3
     for table_row in tr_container:
         children = table_row.children
         null_container_holder = type(
             children[0].find('table').find_all('a'))
         for child in children:
             links = child.find('table').find_all('a')
             if type(links) is not null_container_holder:
                 try:
                     try:
                         pos = pos + 1
                         song_title = links[
                             song_title_constant].contents.first().val(
                             ).string
                         song_artist = links[
                             song_artist_constant].contents.first().val(
                             ).string
                         self.chart_list.append(
                             (('position', pos), ('title', song_title),
                              ('artist', song_artist)))
                     except NullValueError, NameError:
                         print('\n')
                 except NameError:
                     song_title = links[song_title_constant -
                                        1].contents.first().val().string
                     song_artist = links[song_artist_constant -
                                         1].contents.first().val().string
                     self.chart_list.append(
                         (('position', pos), ('title', song_title),
                          ('artist', song_artist)))
     return json.dumps(self.chart_list, indent=3)
示例#13
0
    def test_repr_unicode(self):

        s = Soupy('<html>∂ƒ</html>')
        print(s)
        print(repr(s))
        if not PY3:  # must be ascii-encodable on py2
            assert repr(s).encode('ascii')
        print(text_type(s))
示例#14
0
def check_podcast(type, url):
    global CHANNEL, videos, bot, apikey

    page = Soupy(urllib.urlopen(url))
    try:
        namenode = page.find("h2")
        latestname = namenode.text.val()
        if not latestname == videos[type]:
            latestdesc = page.find(class_="deck").text.val().strip()
            bot.say(CHANNEL, "[New %s] %s - %s %s" % (PODCAST_NAMES[type], latestname, latestdesc, url))
            log.info("New %s: %s" % (PODCAST_NAMES[type], latestname))
            videos[type] = latestname
            return True
        return False
    except:
        log.error("Failed checking for latest %s at %s" % (type, url))
        return False
示例#15
0
    def __init__(self, url, generator = correct_content_generator):

        self.url = url
        self.tags = []
        self.img_url = []

        soup = Soupy(download(url))

        self.title = soup.find('title').text.val() or 'Lorem Ipsum'
        self.safe_title = safe_chars(self.title)
        try:
            find_start_tag(soup)
        except NameError as err:
            generator = only_p_generator
        
        for tag in generator(soup):
            self.retrieve_file(tag)
示例#16
0
def check_podcast(type, url):
    global CHANNEL, videos, bot, apikey

    page = Soupy(urllib.urlopen(url))
    try:
        namenode = page.find("h2")
        latestname = namenode.text.val()
        if not latestname == videos[type]:
            latestdesc = page.find(class_="deck").text.val().strip()
            bot.say(
                CHANNEL, "[New %s] %s - %s %s" %
                (PODCAST_NAMES[type], latestname, latestdesc, url))
            log.info("New %s: %s" % (PODCAST_NAMES[type], latestname))
            videos[type] = latestname
            return True
        return False
    except:
        log.error("Failed checking for latest %s at %s" % (type, url))
        return False
示例#17
0
 def get_chart(self, chart_name):
     self.chart_list = []
     self.chart_name = chart_name.lower()
     global KeyError
     try:
         number = self.chart_titles_dict[self.chart_name]
     except KeyError:
         #return json.dumps(["That chart does not exist"], indent = 2)
         raise ValueError('That chart does not exist')
     self.url = self.base_url + str(number)
     raw = requests.get(self.url)
     soup = Soupy(raw.text)
     tr_container = soup.find_all('tr',{'class':'latc_song'})
     global NameError
     pos = 0
     song_title_constant = 2
     song_artist_constant = 3
     for table_row in tr_container:
         children = table_row.children
         null_container_holder = type(children[0].find('table').find_all('a'))
         for child in children:
             links = child.find('table').find_all('a')
             if type(links) is not null_container_holder:
                 try:
                     try:
                         pos = pos + 1
                         song_title = links[song_title_constant].contents.first().val().string
                         song_artist = links[song_artist_constant].contents.first().val().string
                         self.chart_list.append((('position',pos),('title',song_title), ('artist',song_artist)))
                     except NullValueError, NameError:
                         print ('\n')
                 except NameError:
                     song_title = links[song_title_constant-1].contents.first().val().string
                     song_artist = links[song_artist_constant-1].contents.first().val().string
                     self.chart_list.append((('position',pos),('title',song_title), ('artist',song_artist)))
     return json.dumps(self.chart_list, indent = 3)
示例#18
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]

        result = node.find_all('a').dump(Q.text).val()
        assert result == [('1', ), ('2', ), ('3', )]

        with pytest.raises(ValueError):
            node.find('a').dump(Q.text, a=Q.text)
示例#19
0
    def test_dump_with_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text.map(int)).val()
        assert result == [{'a': 1}, {'a': 2}, {'a': 3}]
示例#20
0
    def test_dump_with_getitem(self):
        node = Soupy('<a val="1">1</a>')

        result = node.find_all('a').dump(
            a=Q.attrs["val"]).val()
        assert result == [{'a': "1"}]
示例#21
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]
示例#22
0
 def setup_method(self, method):
     self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')
示例#23
0
    def test_prettify(self):
        s = Soupy('<html>∂ƒ</html>')

        assert s.prettify() == s.val().prettify()
示例#24
0
 def test_call(self):
     node = Soupy('<a class="test">test</a>')
     assert node('a').val() == node.val()('a')
示例#25
0
 def test_iter(self):
     node = Soupy('<a class="test">test</a>')
     for a, b in zip(node, node.val()):
         assert a.val() == b
示例#26
0
    def test_dump_with_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text.map(int)).val()
        assert result == [{'a': 1}, {'a': 2}, {'a': 3}]
示例#27
0
    def test_dump_with_getitem(self):
        node = Soupy('<a val="1">1</a>')

        result = node.find_all('a').dump(a=Q.attrs["val"]).val()
        assert result == [{'a': "1"}]
示例#28
0
    def test_dump_with_method(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.find('b').orelse('')).val()
        assert result == [{'a': ''}, {'a': ''}, {'a': ''}]
示例#29
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]
示例#30
0
 def test_scalar_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
示例#31
0
 def test_collection_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('b')
     dom = node.val()
     assert list(getattr(node, attr).val()) == list(getattr(dom, attr))
示例#32
0
def return_all_links(url):    
    soup = Soupy(download(url))
    return [tag for tag in soup.find_all('a') if is_tag_not_anchor(tag)]
示例#33
0
    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))
示例#34
0
    def test_dump_with_multi_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text.map(int).map(lambda x: x * 2)).val()
        assert result == [{'a': 2}, {'a': 4}, {'a': 6}]
示例#35
0
 def test_iter(self):
     node = Soupy('<a class="test">test</a>')
     for a, b in zip(node, node.val()):
         assert a.val() == b
示例#36
0
class TestCollection(object):
    def setup_method(self, method):
        self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')

    def test_slice(self):
        node = self.node
        dom = node.val()

        assert isinstance(node.children[::2], Collection)
        assert node.children[::2].val() == list(dom.children)[::2]

    def test_slice_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[::2].val() == [0, 2, 4]

    def test_get_single(self):
        node = self.node.find('body')
        dom = node.val()
        assert node.children[1].val() == dom.contents[1]

    def test_get_single_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[2].val() == 2

    def test_map(self):
        node = self.node
        assert node.find_all('a').map(len).val() == 3

    def test_first(self):
        node = self.node
        assert node.find_all('a').first().text.val() == '1'

    def test_first_empty(self):
        node = self.node
        assert isinstance(node.find_all('x').first(), NullNode)

    def test_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int)).val()
        assert result == [1, 2, 3]

    def test_multi_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int), Q.text).val()
        assert result == [(1, '1'), (2, '2'), (3, '3')]

    def test_filter(self):
        node = self.node
        result = node.find_all('a').filter(Q.text.map(int) > 1).val()
        assert len(result) == 2

    def test_filter(self):
        node = self.node
        result = node.find_all('a').exclude(Q.text.map(int) > 1).val()
        assert len(result) == 1

    def test_filter_noarg(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int) > 1).filter().val()
        assert len(result) == 2

    def test_takewhile(self):
        node = self.node
        result = node.find_all('a').takewhile(Q.text.map(int) < 2).val()
        assert len(result) == 1

    def test_dropwhile(self):
        node = self.node

        result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val()
        assert len(result) == 2

    def test_index_oob(self):
        assert isinstance(Collection([])[5], NullNode)

    def test_bool(self):

        assert Collection([Scalar(1)])
        assert not Collection([])

    def test_count(self):

        assert Collection([]).count().val() == 0
        assert Collection([Scalar(1)]).count().val() == 1
        assert NullCollection().count().val() == 0

    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))

    def test_zip(self):

        c1 = Collection(map(Scalar, [1, 2, 3]))
        c2 = c1.each(Q * 2)
        c3 = c1.zip(c2)

        assert c3.val() == [(1, 2), (2, 4), (3, 6)]

        with pytest.raises(ValueError):
            c1.zip([1, 2])

    def test_dictzip(self):

        c = Collection([Scalar(1), Scalar(2)])
        result = c.dictzip(['a', 'b'])
        expected = {'a': 1, 'b': 2}

        assert result.val() == expected

        lbls = Collection([Scalar('a'), Scalar('b')])
        assert c.dictzip(lbls).val() == expected

    def test_list(self):

        items = list(map(Scalar, [1, 2]))
        assert list(Collection(items)) == items

    def test_typecheck(self):
        """ Collections must contain wrappers """
        with pytest.raises(TypeError):
            Collection([1])

    def test_all(self):

        c = Collection(map(Scalar, [True, False]))
        assert c.any().val()
        assert not c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [True, True]))
        assert c.any().val()
        assert c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [False, False]))
        assert not c.any().val()
        assert not c.all().val()
        assert c.none().val()

        c = Collection([])
        assert not c.any().val()
        assert c.none().val()
        assert c.all().val()  # this is python's behavior for empty lists
示例#37
0
 def test_nonnull_returns_self(self):
     s = Soupy('')
     assert s.nonnull() == s
示例#38
0
    def test_failed_search(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        with pytest.raises(NullValueError):
            node.find_all('a').dump(a=Q.find('b').text)
示例#39
0
class TestCollection(object):

    def setup_method(self, method):
        self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')

    def test_slice(self):
        node = self.node
        dom = node.val()

        assert isinstance(node.children[::2], Collection)
        assert node.children[::2].val() == list(dom.children)[::2]

    def test_slice_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[::2].val() == [0, 2, 4]

    def test_get_single(self):
        node = self.node.find('body')
        dom = node.val()
        assert node.children[1].val() == dom.contents[1]

    def test_get_single_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[2].val() == 2

    def test_map(self):
        node = self.node
        assert node.find_all('a').map(len).val() == 3

    def test_first(self):
        node = self.node
        assert node.find_all('a').first().text.val() == '1'

    def test_first_empty(self):
        node = self.node
        assert isinstance(node.find_all('x').first(), NullNode)

    def test_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int)).val()
        assert result == [1, 2, 3]

    def test_multi_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int),
                                         Q.text).val()
        assert result == [(1, '1'), (2, '2'), (3, '3')]

    def test_filter(self):
        node = self.node
        result = node.find_all('a').filter(Q.text.map(int) > 1).val()
        assert len(result) == 2

    def test_filter(self):
        node = self.node
        result = node.find_all('a').exclude(Q.text.map(int) > 1).val()
        assert len(result) == 1

    def test_filter_noarg(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int) > 1).filter().val()
        assert len(result) == 2

    def test_takewhile(self):
        node = self.node
        result = node.find_all('a').takewhile(Q.text.map(int) < 2).val()
        assert len(result) == 1

    def test_dropwhile(self):
        node = self.node

        result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val()
        assert len(result) == 2

    def test_index_oob(self):
        assert isinstance(Collection([])[5], NullNode)

    def test_bool(self):

        assert Collection([Scalar(1)])
        assert not Collection([])

    def test_count(self):

        assert Collection([]).count().val() == 0
        assert Collection([Scalar(1)]).count().val() == 1
        assert NullCollection().count().val() == 0

    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))

    def test_zip(self):

        c1 = Collection(map(Scalar, [1, 2, 3]))
        c2 = c1.each(Q * 2)
        c3 = c1.zip(c2)

        assert c3.val() == [(1, 2), (2, 4), (3, 6)]

        with pytest.raises(ValueError):
            c1.zip([1, 2])

    def test_dictzip(self):

        c = Collection([Scalar(1), Scalar(2)])
        result = c.dictzip(['a', 'b'])
        expected = {'a': 1, 'b': 2}

        assert result.val() == expected

        lbls = Collection([Scalar('a'), Scalar('b')])
        assert c.dictzip(lbls).val() == expected

    def test_list(self):

        items = list(map(Scalar, [1, 2]))
        assert list(Collection(items)) == items

    def test_typecheck(self):
        """ Collections must contain wrappers """
        with pytest.raises(TypeError):
            Collection([1])

    def test_all(self):

        c = Collection(map(Scalar, [True, False]))
        assert c.any().val()
        assert not c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [True, True]))
        assert c.any().val()
        assert c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [False, False]))
        assert not c.any().val()
        assert not c.all().val()
        assert c.none().val()

        c = Collection([])
        assert not c.any().val()
        assert c.none().val()
        assert c.all().val()  # this is python's behavior for empty lists
示例#40
0
    def test_prettify(self):
        s = Soupy('<html>∂ƒ</html>')

        assert s.prettify() == s.val().prettify()
示例#41
0
 def test_collection_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('b')
     dom = node.val()
     assert list(getattr(node, attr).val()) == list(getattr(dom, attr))
示例#42
0
    def test_either_fallback(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert isinstance(node.apply(either(Q.find('d').text,
                                            Q.find('d').text)),
                          Null)
示例#43
0
    def test_dump_with_method(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.find('b').orelse('')).val()
        assert result == [{'a': ''}, {'a': ''}, {'a': ''}]
示例#44
0
 def test_nonnull_returns_self(self):
     s = Soupy('')
     assert s.nonnull() == s
示例#45
0
 def test_scalar_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
示例#46
0
 def test_find_multi_fail(self, method):
     node = Soupy('<a class="test">test</a>')
     result = getattr(node, method)('b')
     assert len(result) == 0
示例#47
0
    def test_dump_with_multi_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text.map(int).map(lambda x: x * 2)).val()
        assert result == [{'a': 2}, {'a': 4}, {'a': 6}]
示例#48
0
 def test_call(self):
     node = Soupy('<a class="test">test</a>')
     assert node('a').val() == node.val()('a')
示例#49
0
 def test_node_properties(self, attr):
     node = Soupy('<b><d></d><c>test</c><d></d></b>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
示例#50
0
 def setup_method(self, method):
     self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')
示例#51
0
    def test_either(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert node.apply(either(Q.find('c').text,
                                 Q.find('b').text)).val() == '1'
示例#52
0
    def test_either(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert node.apply(either(Q.find('c').text,
                                 Q.find('b').text)).val() == '1'
示例#53
0
 def test_node_properties(self, attr):
     node = Soupy('<b><d></d><c>test</c><d></d></b>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
示例#54
0
    def test_either_fallback(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert isinstance(
            node.apply(either(Q.find('d').text,
                              Q.find('d').text)), Null)
示例#55
0
文件: test_soupy.py 项目: nsfmc/soupy
    def test_repr_unicode(self):

        s = Soupy('<html>∂ƒ</html>')
        print(s)
        print(repr(s))
        print(text_type(s))
示例#56
0
 def test_find_single_fail(self, method):
     node = Soupy('<a class="test">test</a>')
     assert isinstance(getattr(node, method)('b'), NullNode)