def test_from_html_with_iframe_src(self): htmlstring = """ <div id="content_body" itemprop="articleBody"> <div id="dropped_media_355_84" class="mceNonEditable embedded_content embedded_full_width basic-caption"> <figure> <div class="flex-video"><iframe class="embedly-embed" src="http://www.youtube.com/watch?v=fPxUIz5GHAE" width="854" height="480" frameborder="0" scrolling="no" allowfullscreen="allowfullscreen"></iframe></div> </figure> </div> </div> </div> """ source = 'https://www.revealnews.org/article/a-brief-history-of-the-modern-strawberry/' out = url.from_html(htmlstring, source=source) assert('http://www.youtube.com/watch?v=fPxUIz5GHAE' in out)
def test_from_html_with_iframe_src(self): htmlstring = """ <div id="content_body" itemprop="articleBody"> <div id="dropped_media_355_84" class="mceNonEditable embedded_content embedded_full_width basic-caption"> <figure> <div class="flex-video"><iframe class="embedly-embed" src="http://www.youtube.com/watch?v=fPxUIz5GHAE" width="854" height="480" frameborder="0" scrolling="no" allowfullscreen="allowfullscreen"></iframe></div> </figure> </div> </div> </div> """ source = 'https://www.revealnews.org/article/a-brief-history-of-the-modern-strawberry/' out = url.from_html(htmlstring, source=source) assert ('http://www.youtube.com/watch?v=fPxUIz5GHAE' in out)
def test_from_html_with_embed_redirect(self): htmlstring = """ <div id="content_body" itemprop="articleBody"> <div id="dropped_media_355_84" class="mceNonEditable embedded_content embedded_full_width basic-caption"> <figure> <div class="flex-video"><iframe class="embedly-embed" src="//cdn.embedly.com/widgets/media.html?url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DfPxUIz5GHAE&src=http%3A%2F%2Fwww.youtube.com%2Fembed%2FfPxUIz5GHAE&type=text%2Fhtml&key=1b74e47c9db441f8a998fb6138abca72&schema=youtube" width="854" height="480" frameborder="0" scrolling="no" allowfullscreen="allowfullscreen"></iframe></div> </figure> </div> <p>From cereal to ice cream to cocktails, it seems that strawberries are served with just about everything. But it wasn't always this way. Today, Americans eat four times as many strawberries as they did 40 years ago. This short stop-motion animation explains how clever advertising tactics and certain pesticides helped make the juicy red fruit cheaply and widely available. There are, however, hidden costs to using these chemicals.</p> <div class="edit-credits"> <p>Director and Producer: Ariane Wu<br> </div> </div> """ source = 'https://www.revealnews.org/article/a-brief-history-of-the-modern-strawberry/' out = url.from_html(htmlstring, source=source) assert('http://www.youtube.com/watch?v=fPxUIz5GHAE' in out)
def test_from_html_with_embed_redirect(self): htmlstring = """ <div id="content_body" itemprop="articleBody"> <div id="dropped_media_355_84" class="mceNonEditable embedded_content embedded_full_width basic-caption"> <figure> <div class="flex-video"><iframe class="embedly-embed" src="//cdn.embedly.com/widgets/media.html?url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DfPxUIz5GHAE&src=http%3A%2F%2Fwww.youtube.com%2Fembed%2FfPxUIz5GHAE&type=text%2Fhtml&key=1b74e47c9db441f8a998fb6138abca72&schema=youtube" width="854" height="480" frameborder="0" scrolling="no" allowfullscreen="allowfullscreen"></iframe></div> </figure> </div> <p>From cereal to ice cream to cocktails, it seems that strawberries are served with just about everything. But it wasn't always this way. Today, Americans eat four times as many strawberries as they did 40 years ago. This short stop-motion animation explains how clever advertising tactics and certain pesticides helped make the juicy red fruit cheaply and widely available. There are, however, hidden costs to using these chemicals.</p> <div class="edit-credits"> <p>Director and Producer: Ariane Wu<br> </div> </div> """ source = 'https://www.revealnews.org/article/a-brief-history-of-the-modern-strawberry/' out = url.from_html(htmlstring, source=source) assert ('http://www.youtube.com/watch?v=fPxUIz5GHAE' in out)
def get_links(self, body, entry_url): """ Extract links in the article body. """ return url.from_html(body, source=entry_url)