def test_empty_scalars_return_nullnode(self): node = Soupy('<a></a>').find('a') assert isinstance(node.next_sibling, NullNode) assert isinstance(node.previous_sibling, NullNode) node = Soupy('<a></a>') assert isinstance(node.parent, NullNode)
def test_failed_search(self): node = Soupy('<a><b>1</b></a><a>2</a>') with pytest.raises(NullValueError): node.find_all('a').dump( a=Q.find('b').text )
def test_orelse(self): node = Soupy('<a><b>1</b></a><a>2</a>') result = node.find_all('a').dump( a=Q.find('b').text.map(int).orelse(0)).val() assert result == [{'a': 1}, {'a': 0}]
def test_orelse(self): node = Soupy('<a><b>1</b></a><a>2</a>') result = node.find_all('a').dump( a=Q.find('b').text.map(int).orelse(0) ).val() assert result == [{'a': 1}, {'a': 0}]
def test_multi_dump(self): node = Soupy('<a val="1">1</a><a>2</a><a val="3">3</a>') result = node.find_all('a').dump( a=Q.text, b=Q.attrs.get('val')).val() assert result == [{'a': '1', 'b': '1'}, {'a': '2', 'b': None}, {'a': '3', 'b': '3'}]
def test_navstring_dump(self): node = Soupy('<div><a>1</a>2<a>3</a></div>') result = node.find('div').contents.each(Q.text).val() assert result == ['1', '2', '3'] result = (node.find('div').contents.each( Q.contents[0].text.orelse('!')).val()) assert result == ['1', '!', '3']
def test_navstring_dump(self): node = Soupy('<div><a>1</a>2<a>3</a></div>') result = node.find('div').contents.each(Q.text).val() assert result == ['1', '2', '3'] result = (node.find('div').contents .each(Q.contents[0].text.orelse('!')) .val()) assert result == ['1', '!', '3']
def test_simple_dump(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump(a=Q.text).val() assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}] result = node.find_all('a').dump(Q.text).val() assert result == [('1',), ('2',), ('3',)] with pytest.raises(ValueError): node.find('a').dump(Q.text, a=Q.text)
def test_find_multi_methods(self, method): node = Soupy(""" <div> <div></div> <b><div></div></b> <div></div> </div> """).find('b') dom = node.val() expected = getattr(dom, method)('div') assert expected actual = getattr(node, method)('div').val() assert actual == expected
def test_multi_dump(self): node = Soupy('<a val="1">1</a><a>2</a><a val="3">3</a>') result = node.find_all('a').dump(a=Q.text, b=Q.attrs.get('val')).val() assert result == [{ 'a': '1', 'b': '1' }, { 'a': '2', 'b': None }, { 'a': '3', 'b': '3' }]
def get_chart(self, chart_name): self.chart_list = [] self.chart_name = chart_name.lower() global KeyError try: number = self.chart_titles_dict[self.chart_name] except KeyError: #return json.dumps(["That chart does not exist"], indent = 2) raise ValueError('That chart does not exist') self.url = self.base_url + str(number) raw = requests.get(self.url) soup = Soupy(raw.text) tr_container = soup.find_all('tr', {'class': 'latc_song'}) global NameError pos = 0 song_title_constant = 2 song_artist_constant = 3 for table_row in tr_container: children = table_row.children null_container_holder = type( children[0].find('table').find_all('a')) for child in children: links = child.find('table').find_all('a') if type(links) is not null_container_holder: try: try: pos = pos + 1 song_title = links[ song_title_constant].contents.first().val( ).string song_artist = links[ song_artist_constant].contents.first().val( ).string self.chart_list.append( (('position', pos), ('title', song_title), ('artist', song_artist))) except NullValueError, NameError: print('\n') except NameError: song_title = links[song_title_constant - 1].contents.first().val().string song_artist = links[song_artist_constant - 1].contents.first().val().string self.chart_list.append( (('position', pos), ('title', song_title), ('artist', song_artist))) return json.dumps(self.chart_list, indent=3)
def test_repr_unicode(self): s = Soupy('<html>∂ƒ</html>') print(s) print(repr(s)) if not PY3: # must be ascii-encodable on py2 assert repr(s).encode('ascii') print(text_type(s))
def check_podcast(type, url): global CHANNEL, videos, bot, apikey page = Soupy(urllib.urlopen(url)) try: namenode = page.find("h2") latestname = namenode.text.val() if not latestname == videos[type]: latestdesc = page.find(class_="deck").text.val().strip() bot.say(CHANNEL, "[New %s] %s - %s %s" % (PODCAST_NAMES[type], latestname, latestdesc, url)) log.info("New %s: %s" % (PODCAST_NAMES[type], latestname)) videos[type] = latestname return True return False except: log.error("Failed checking for latest %s at %s" % (type, url)) return False
def __init__(self, url, generator = correct_content_generator): self.url = url self.tags = [] self.img_url = [] soup = Soupy(download(url)) self.title = soup.find('title').text.val() or 'Lorem Ipsum' self.safe_title = safe_chars(self.title) try: find_start_tag(soup) except NameError as err: generator = only_p_generator for tag in generator(soup): self.retrieve_file(tag)
def check_podcast(type, url): global CHANNEL, videos, bot, apikey page = Soupy(urllib.urlopen(url)) try: namenode = page.find("h2") latestname = namenode.text.val() if not latestname == videos[type]: latestdesc = page.find(class_="deck").text.val().strip() bot.say( CHANNEL, "[New %s] %s - %s %s" % (PODCAST_NAMES[type], latestname, latestdesc, url)) log.info("New %s: %s" % (PODCAST_NAMES[type], latestname)) videos[type] = latestname return True return False except: log.error("Failed checking for latest %s at %s" % (type, url)) return False
def get_chart(self, chart_name): self.chart_list = [] self.chart_name = chart_name.lower() global KeyError try: number = self.chart_titles_dict[self.chart_name] except KeyError: #return json.dumps(["That chart does not exist"], indent = 2) raise ValueError('That chart does not exist') self.url = self.base_url + str(number) raw = requests.get(self.url) soup = Soupy(raw.text) tr_container = soup.find_all('tr',{'class':'latc_song'}) global NameError pos = 0 song_title_constant = 2 song_artist_constant = 3 for table_row in tr_container: children = table_row.children null_container_holder = type(children[0].find('table').find_all('a')) for child in children: links = child.find('table').find_all('a') if type(links) is not null_container_holder: try: try: pos = pos + 1 song_title = links[song_title_constant].contents.first().val().string song_artist = links[song_artist_constant].contents.first().val().string self.chart_list.append((('position',pos),('title',song_title), ('artist',song_artist))) except NullValueError, NameError: print ('\n') except NameError: song_title = links[song_title_constant-1].contents.first().val().string song_artist = links[song_artist_constant-1].contents.first().val().string self.chart_list.append((('position',pos),('title',song_title), ('artist',song_artist))) return json.dumps(self.chart_list, indent = 3)
def test_simple_dump(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump(a=Q.text).val() assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}] result = node.find_all('a').dump(Q.text).val() assert result == [('1', ), ('2', ), ('3', )] with pytest.raises(ValueError): node.find('a').dump(Q.text, a=Q.text)
def test_dump_with_map(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump( a=Q.text.map(int)).val() assert result == [{'a': 1}, {'a': 2}, {'a': 3}]
def test_dump_with_getitem(self): node = Soupy('<a val="1">1</a>') result = node.find_all('a').dump( a=Q.attrs["val"]).val() assert result == [{'a': "1"}]
def test_simple_dump(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump( a=Q.text).val() assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]
def setup_method(self, method): self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')
def test_prettify(self): s = Soupy('<html>∂ƒ</html>') assert s.prettify() == s.val().prettify()
def test_call(self): node = Soupy('<a class="test">test</a>') assert node('a').val() == node.val()('a')
def test_iter(self): node = Soupy('<a class="test">test</a>') for a, b in zip(node, node.val()): assert a.val() == b
def test_dump_with_map(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump(a=Q.text.map(int)).val() assert result == [{'a': 1}, {'a': 2}, {'a': 3}]
def test_dump_with_getitem(self): node = Soupy('<a val="1">1</a>') result = node.find_all('a').dump(a=Q.attrs["val"]).val() assert result == [{'a': "1"}]
def test_dump_with_method(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump(a=Q.find('b').orelse('')).val() assert result == [{'a': ''}, {'a': ''}, {'a': ''}]
def test_simple_dump(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump(a=Q.text).val() assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]
def test_scalar_properties(self, attr): node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('c') dom = node.val() assert getattr(node, attr).val() == getattr(dom, attr)
def test_collection_properties(self, attr): node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('b') dom = node.val() assert list(getattr(node, attr).val()) == list(getattr(dom, attr))
def return_all_links(url): soup = Soupy(download(url)) return [tag for tag in soup.find_all('a') if is_tag_not_anchor(tag)]
def test_repr_unicode(self): s = Collection([Soupy('<html>∂ƒ</html>')]) print(s) print(repr(s)) print(text_type(s))
def test_dump_with_multi_map(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump( a=Q.text.map(int).map(lambda x: x * 2)).val() assert result == [{'a': 2}, {'a': 4}, {'a': 6}]
class TestCollection(object): def setup_method(self, method): self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>') def test_slice(self): node = self.node dom = node.val() assert isinstance(node.children[::2], Collection) assert node.children[::2].val() == list(dom.children)[::2] def test_slice_on_iterator(self): c = Collection((Scalar(i) for i in range(5))) assert c[::2].val() == [0, 2, 4] def test_get_single(self): node = self.node.find('body') dom = node.val() assert node.children[1].val() == dom.contents[1] def test_get_single_on_iterator(self): c = Collection((Scalar(i) for i in range(5))) assert c[2].val() == 2 def test_map(self): node = self.node assert node.find_all('a').map(len).val() == 3 def test_first(self): node = self.node assert node.find_all('a').first().text.val() == '1' def test_first_empty(self): node = self.node assert isinstance(node.find_all('x').first(), NullNode) def test_each(self): node = self.node result = node.find_all('a').each(Q.text.map(int)).val() assert result == [1, 2, 3] def test_multi_each(self): node = self.node result = node.find_all('a').each(Q.text.map(int), Q.text).val() assert result == [(1, '1'), (2, '2'), (3, '3')] def test_filter(self): node = self.node result = node.find_all('a').filter(Q.text.map(int) > 1).val() assert len(result) == 2 def test_filter(self): node = self.node result = node.find_all('a').exclude(Q.text.map(int) > 1).val() assert len(result) == 1 def test_filter_noarg(self): node = self.node result = node.find_all('a').each(Q.text.map(int) > 1).filter().val() assert len(result) == 2 def test_takewhile(self): node = self.node result = node.find_all('a').takewhile(Q.text.map(int) < 2).val() assert len(result) == 1 def test_dropwhile(self): node = self.node result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val() assert len(result) == 2 def test_index_oob(self): assert isinstance(Collection([])[5], NullNode) def test_bool(self): assert Collection([Scalar(1)]) assert not Collection([]) def test_count(self): assert Collection([]).count().val() == 0 assert Collection([Scalar(1)]).count().val() == 1 assert NullCollection().count().val() == 0 def test_repr_unicode(self): s = Collection([Soupy('<html>∂ƒ</html>')]) print(s) print(repr(s)) print(text_type(s)) def test_zip(self): c1 = Collection(map(Scalar, [1, 2, 3])) c2 = c1.each(Q * 2) c3 = c1.zip(c2) assert c3.val() == [(1, 2), (2, 4), (3, 6)] with pytest.raises(ValueError): c1.zip([1, 2]) def test_dictzip(self): c = Collection([Scalar(1), Scalar(2)]) result = c.dictzip(['a', 'b']) expected = {'a': 1, 'b': 2} assert result.val() == expected lbls = Collection([Scalar('a'), Scalar('b')]) assert c.dictzip(lbls).val() == expected def test_list(self): items = list(map(Scalar, [1, 2])) assert list(Collection(items)) == items def test_typecheck(self): """ Collections must contain wrappers """ with pytest.raises(TypeError): Collection([1]) def test_all(self): c = Collection(map(Scalar, [True, False])) assert c.any().val() assert not c.all().val() assert not c.none().val() c = Collection(map(Scalar, [True, True])) assert c.any().val() assert c.all().val() assert not c.none().val() c = Collection(map(Scalar, [False, False])) assert not c.any().val() assert not c.all().val() assert c.none().val() c = Collection([]) assert not c.any().val() assert c.none().val() assert c.all().val() # this is python's behavior for empty lists
def test_nonnull_returns_self(self): s = Soupy('') assert s.nonnull() == s
def test_failed_search(self): node = Soupy('<a><b>1</b></a><a>2</a>') with pytest.raises(NullValueError): node.find_all('a').dump(a=Q.find('b').text)
def test_either_fallback(self): node = Soupy('<a><b>1</b></a><a>2</a>') assert isinstance(node.apply(either(Q.find('d').text, Q.find('d').text)), Null)
def test_dump_with_method(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump( a=Q.find('b').orelse('')).val() assert result == [{'a': ''}, {'a': ''}, {'a': ''}]
def test_find_multi_fail(self, method): node = Soupy('<a class="test">test</a>') result = getattr(node, method)('b') assert len(result) == 0
def test_node_properties(self, attr): node = Soupy('<b><d></d><c>test</c><d></d></b>').find('c') dom = node.val() assert getattr(node, attr).val() == getattr(dom, attr)
def test_either(self): node = Soupy('<a><b>1</b></a><a>2</a>') assert node.apply(either(Q.find('c').text, Q.find('b').text)).val() == '1'
def test_either_fallback(self): node = Soupy('<a><b>1</b></a><a>2</a>') assert isinstance( node.apply(either(Q.find('d').text, Q.find('d').text)), Null)
def test_repr_unicode(self): s = Soupy('<html>∂ƒ</html>') print(s) print(repr(s)) print(text_type(s))
def test_find_single_fail(self, method): node = Soupy('<a class="test">test</a>') assert isinstance(getattr(node, method)('b'), NullNode)