def test_oth3(self): """4 authors.""" i = ( 'https://arstechnica.com/science/2007/09/' 'the-pseudoscience-behind-homeopathy/' ) o = urls_scr(i) ct = ( '* {{cite web ' '| last=Timmer ' '| first=John ' '| last2=Ford ' '| first2=Matt ' '| last3=Lee ' '| first3=Chris ' '| last4=Gitlin ' '| first4=Jonathan ' '| title=Diluting the scientific method: Ars looks at homeopathy ' '| website=Ars Technica ' '| date=2007-09-12 ' '| url=https://arstechnica.com/science/2007/09/' 'the-pseudoscience-behind-homeopathy/ ' '| access-date=' ) self.assertIn(ct, o[1])
def test_dm1(self): """4 authors""" o = urls_scr( 'http://www.dailymail.co.uk/news/article-2633025/' 'London-cleric-convicted-NYC-terrorism-trial.html' ) self.assertEqual( '{{sfn | Malm | Witheridge | Drury | Bates | 2014}}', o[0] ) self.assertIn( '* {{cite web ' '| last=Malm ' '| first=Sara ' '| last2=Witheridge ' '| first2=Annette ' '| last3=Drury ' '| first3=Ian ' '| last4=Bates ' '| first4=Daniel ' '| title=Abu Hamza found guilty in US court of helping' ' Al-Qaeda terrorists ' '| website=Daily Mail Online ' '| date=2014-05-19 ' '| url=http://www.dailymail.co.uk/news/article-2633025/' 'London-cleric-convicted-NYC-terrorism-trial.html ' '| access-date=', o[1], )
def test_nyt2(self): """newstylct, 2 authors""" ct = ( '* {{cite web ' '| last=Belson ' '| first=Ken ' '| last2=Sandomir ' '| first2=Richard ' '| title=$2 Billion for Clippers? In Time, ' 'It May Be a Steal for Steve Ballmer ' '| website=The New York Times ' '| date=2014-05-30 ' '| url=https://www.nytimes.com/2014/05/31/sports/basketball/' 'steven-a-ballmers-2-billion-play-for-clippers-is-a-big-bet-on-' 'the-nba.html ' '| access-date=' ) self.assertIn( ct, urls_scr( 'https://www.nytimes.com/2014/05/31/sports/basketball/' 'steven-a-ballmers-2-billion-play-for-clippers-is-a-big-bet-' 'on-the-nba.html?hp' )[1], )
def test_citation_author_reverse_order(self): """Test correct detection of citation_author. first name and last name are in reverse order. """ self.assertIn( '* {{cite web ' '| last=Hartman ' '| first=JudithAnn R. ' '| last2=Nelson ' '| first2=Eric A. ' '| title=Automaticity in Computation and Student Success in ' 'Introductory Physical Science Courses ' '| website=arXiv.org e-Print archive ' '| date=2016-08-17 ' '| url=https://arxiv.org/abs/1608.05006?utm_medium=email&' 'utm_source=other&utm_campaign=opencourse.GdeNrll1EeSROyIACtiVvg.' 'announcements%257Eopencourse.GdeNrll1EeSROyIACtiVvg.' '4xDVKzx5EeeJjRJrkGD1dA ' '| access-date=', urls_scr( 'https://arxiv.org/abs/1608.05006?utm_medium=email&utm_source=' 'other&utm_campaign=opencourse.GdeNrll1EeSROyIACtiVvg.' 'announcements%257Eopencourse.GdeNrll1EeSROyIACtiVvg.' '4xDVKzx5EeeJjRJrkGD1dA' )[1], )
def test_oth12(self): """Times of India, author could not be detected.""" i = ('http://timesofindia.indiatimes.com/city/pune/' 'UK-allows-working-visas-for-Indian-students/' 'articleshow/1163528927.cms?') o = urls_scr(i) sfn = "{{sfn | Kashyap | 2001}}" self.assertIn(sfn, o[0])
def google_encrypted_scr(url, parsed_url, date_format): if parsed_url[2][:7] in {'/books', '/books/'}: # sample urls: # https://encrypted.google.com/books?id=6upvonUt0O8C # https://www.google.com/books?id=bwfoCAAAQBAJ&pg=PA32 # https://www.google.com/books/edition/_/bwfoCAAAQBAJ?gbpv=1&pg=PA32 return googlebooks_scr(parsed_url, date_format) return urls_scr(url, date_format)
def test_single_line_meta_tags(): """Issue #9.""" assert ("* {{cite web | last=Shoichet | first=Catherine E. " "| title=Spill spews tons of coal ash into North Carolina's " "Dan River | website=CNN | date=2014-02-09 " "| url=http://www.cnn.com/2014/02/09/us/north-carolina-coal-ash" "-spill/index.html | access-date=") in urls_scr( 'https://edition.cnn.com/' '2014/02/09/us/north-carolina-coal-ash-spill/')[1]
def test_indaily(self): self.assertIn( "* {{cite web | last=Siebert | first=Bension " "| title=Epidemics expert questions Marshall's schools advice " "| website=InDaily | date=2020-03-19 " "| url=https://indaily.com.au/news/2020/03/19/epidemics-expert-contradicts-marshalls-schools-advice/ " "| access-date=", urls_scr( 'https://indaily.com.au/news/2020/03/19/epidemics-expert-contradicts-marshalls-schools-advice/' )[1])
def test_reverse_name(): """Author is `Martin, Tracy`. Tracy should be the first name.""" assert ('* {{cite web ' '| last=Martin ' '| first=Tracy ' '| title=Dynamometers Explained ' '| website=HighBeam Research ' '| date=2014-07-01 ' '| url=http://www.highbeam.com/doc/1P3-3372742961.html ' '| access-date=' ) in urls_scr('http://www.highbeam.com/doc/1P3-3372742961.html')[1]
def test_bbc5(): """news.bbc.co.uk, 1 author""" assert ("* {{cite web " "| last=Madslien " "| first=Jorn " "| title=Inside the Bentley factory " "| website=BBC NEWS " "| date=2002-12-24 " "| url=http://news.bbc.co.uk/2/hi/business/2570109.stm " "| access-date=" ) in urls_scr('http://news.bbc.co.uk/2/hi/business/2570109.stm')[1]
def test_abc_author(): assert ('* {{cite web | last=Ferguson | first=Kathleen ' '| title=Glow worms in Wollemi National Park survived Gospers ' 'Mountain bushfire - ABC News ' '| website=ABC (Australian Broadcasting Corporation) ' '| date=2020-09-06 | url=https://www.abc.net.au/news/2020-09-06/' 'glow-worms-in-wollemi-national-park-survived-summer-bushfire/' '12634762 | access-date=') in urls_scr( 'https://www.abc.net.au/news/2020-09-06/' 'glow-worms-in-wollemi-national-park-survived-summer-bushfire/' '12634762')[1]
def test_bbc2(): """1 author""" assert ('* {{cite web ' '| last=Gage ' '| first=Suzi ' '| title=Sea otter return boosts ailing seagrass in California ' '| website=BBC News ' '| date=2013-08-26 ' '| url=http://www.bbc.com/news/science-environment-23814524 ' '| access-date=') in urls_scr( 'http://www.bbc.com/news/science-environment-23814524')[1]
def test_bbc1(self): """no authors""" i = 'https://www.bbc.com/news/world-asia-27653361' o = urls_scr(i) ct = ("* {{cite web " "| title=US 'received Qatar assurances' on Afghan prisoner deal " "| website=BBC News " "| date=2014-06-01 " "| url=http://www.bbc.com/news/world-asia-27653361 " "| ref={{sfnref | BBC News | 2014}} " "| access-date=") self.assertIn(ct, o[1])
def test_oth7(): """Contains a By Topic line and also the byline contains ' | '.""" assert ('* {{cite web ' '| last=Chandler ' '| first=David L. ' '| title=Traffic lights: There’s a better way ' '| website=MIT News ' '| date=2014-07-07 ' '| url=http://news.mit.edu/2014/' 'traffic-lights-theres-a-better-way-0707 ' '| access-date=') in urls_scr( 'http://news.mit.edu/2014/' 'traffic-lights-theres-a-better-way-0707')[1]
def test_bbc4(): """news.bbc.co.uk, 1 author""" assert ( "* {{cite web " "| last=Jones " "| first=Meirion " "| title=Malaria advice 'risks lives' " "| website=BBC NEWS " "| date=2006-07-13 " "| url=" "http://news.bbc.co.uk/2/hi/programmes/newsnight/5178122.stm " "| access-date=") in urls_scr( 'http://news.bbc.co.uk/2/hi/programmes/newsnight/5178122.stm')[1]
def test_empty_meta_author_content(): """Test that the output will not be malformed because empty meta.""" assert ( "* {{cite web " "| title=UAE's Enoc pays Iran $4 billion in oil dues " "| website=Al Jazeera " "| date=2017-05-29 " "| url=http://www.aljazeera.com/news/2017/05/uae-enoc-pays-iran-4-" "billion-oil-dues-170529171315570.html " "| ref={{sfnref | Al Jazeera | 2017}} " "| access-date=") in urls_scr( 'http://www.aljazeera.com/news/2017/05/' 'uae-enoc-pays-iran-4-billion-oil-dues-170529171315570.html')[1]
def test_bbc3(self): """https version of bbc2 (differs a lot!)""" i = 'https://www.bbc.com/news/science-environment-23814524' o = urls_scr(i) ct = ('* {{cite web ' '| last=Gage ' '| first=Suzi ' '| title=Sea otter return boosts ailing seagrass in California ' '| website=BBC News ' '| date=2013-08-26 ' '| url=http://www.bbc.com/news/science-environment-23814524 ' '| access-date=') self.assertIn(ct, o[1])
def test_bbc6(): """bbc.com, 1 author""" i = 'http://www.bbc.com/news/science-environment-26267918' o = urls_scr(i) ct = ("* {{cite web " "| last=Amos " "| first=Jonathan " "| title=European Space Agency picks Plato planet-hunting mission " "| website=BBC News " "| date=2014-02-20 " "| url=http://www.bbc.com/news/science-environment-26267918 " "| access-date=") assert ct in o[1]
def test_language_not_de_csbc(): assert ( "{{cite web " "| last=Martin " "| first=Emmie " "| title=In San Francisco, households earning $117,000 qualify as ‘low income’ " "| website=CNBC " "| date=2018-06-28 " "| url=https://www.cnbc.com/2018/06/28/families-earning-117000-qualify-as-low-income-in-san-francisco.html " "| access-date=" ) == urls_scr( 'https://www.cnbc.com/2018/06/28/families-earning-117000-qualify-as-low-income-in-san-francisco.html' )[1][2:-12]
def test_oth5(): """Getting the date is tricky here.""" o = urls_scr('http://www.magiran.com/npview.asp?ID=1410487') assert '{{sfn | نوري | 2007}}' in o[0] assert ('* {{cite web ' '| last=نوري ' '| first=آزاده شهمير ' '| title=روزنامه سرمايه86/3/1: دكتر طاهر صباحي، محقق و مجموعه دار' ' فرش: بازار جهاني با توليد فرش هنري نصيب ايران مي شود ' '| website=magiran.com ' '| date=2007-05-22 ' '| url=http://www.magiran.com/npview.asp?ID=1410487 ' '| language=fa ' '| access-date=') in o[1]
def test_oth6(): """Detection of website name.""" o = urls_scr('http://www.farsnews.com/newstext.php?nn=13930418000036') assert "{{sfn | ''خبرگزاری فارس'' | 2014}}" in o[0] # Fars news is using 'خبرگزاری فارس' as og:author which is wrong # and thats why its name is not italicized in sfn. assert ('* {{cite web ' '| title=آیت\u200cالله محمدی گیلانی دارفانی را وداع گفت ' '| website=خبرگزاری فارس ' '| date=2014-07-09 ' '| url=http://www.farsnews.com/newstext.php?nn=13930418000036 ' '| language=fa ' '| ref={{sfnref | خبرگزاری فارس | 2014}} ' '| access-date=') in o[1]
def test_invalid_name(): """Test that URL does not fail with InvalidNameError.""" assert ('* {{cite web | title=انتخابات 96 به روایت آمار ' '| website=پایگاه اطلاع رسانی شبکه خبر صدا' ' و سیمای جمهوری اسلامی ایران |' ' date=2017-05-24 | url=http://www.irinn.ir/fa/news/499654 ' '| language=fa | ref={{sfnref |' ' پایگاه اطلاع رسانی شبکه خبر' ' صدا و سیمای جمهوری اسلامی ایران | 2017}} |' ' access-date=') in urls_scr( 'http://www.irinn.ir/fa/news/499654/' '%D8%A7%D9%86%D8%AA%D8%AE%D8%A7%D8%A8%D8%A7%D8%AA-96-' '%D8%A8%D9%87-%D8%B1%D9%88%D8%A7%DB%8C%D8%AA-' '%D8%A2%D9%85%D8%A7%D8%B1')[1]
def test_oth14(self): """thebulletin.org""" i = ('http://www.independent.co.uk/news/business/' 'the-investment-column-tt-group-1103208.html') o = urls_scr(i) ct = ('* {{cite web ' '| title=The Investment column: TT Group ' '| website=The Independent ' '| date=1999-06-29 ' '| url=http://www.independent.co.uk/news/business/' 'the-investment-column-tt-group-1103208.html ' '| ref={{sfnref | The Independent | 1999}} ' '| access-date=') self.assertIn(ct, o[1])
def test_oth12(self): """thebulletin.org""" i = 'http://thebulletin.org/evidence-shows-iron-dome-not-working7318' o = urls_scr(i) ct = ('* {{cite web ' '| last=Postol ' '| first=Theodore A. ' '| title=The evidence that shows Iron Dome is not working ' '| website=Bulletin of the Atomic Scientists ' '| date=2014-07-19 ' '| url=http://thebulletin.org/' 'evidence-shows-iron-dome-not-working7318 ' '| access-date=') self.assertIn(ct, o[1])
def test_tgd1(self): """ABCNews. Wrong author: | last=News | first=ABC.""" i = 'http://abcnews.go.com/blogs/headlines/2006/12/saddam_executed/' o = urls_scr(i) ct = ('* {{cite web ' '| last=Ross ' '| first=Brian ' '| title=Saddam Executed; An Era Comes to an End ' '| website=ABC News Blogs ' '| date=2006-12-30 ' '| url=http://abcnews.go.com/blogs/headlines/2006/12/' 'saddam_executed/ ' '| access-date=') self.assertIn(ct, o[1])
def test_tgd3(self): """"Staff" in author name.""" i = ('http://www.tgdaily.com/space-features/' '82906-sma-reveals-giant-star-cluster-in-the-making') o = urls_scr(i) ct = ('* {{cite web ' '| title=SMA reveals giant star cluster in the making ' '| website=TG Daily ' '| date=2013-12-17 ' '| url=http://www.tgdaily.com/space-features/' '82906-sma-reveals-giant-star-cluster-in-the-making ' '| ref={{sfnref | TG Daily | 2013}} ' '| access-date=') self.assertIn(ct, o[1])
def test_tgd2(self): """Hard to find author and date.""" i = ('http://www.tgdaily.com/web/' '100381-apple-might-buy-beats-for-32-billion') o = urls_scr(i) ct = ('* {{cite web ' '| title=Apple might buy Beats for $3.2 billion ' '| website=TG Daily ' '| date=2014-05-09 ' '| url=http://www.tgdaily.com/web/' '100381-apple-might-buy-beats-for-32-billion ' '| ref={{sfnref | TG Daily | 2014}} ' '| access-date=') self.assertIn(ct, o[1])
def test_nyt5(self): """special case for date format (not in usual meta tags)""" i = ('https://www.nytimes.com/2007/06/13/world/americas/' '13iht-whale.1.6123654.html') o = urls_scr(i) ct = ('* {{cite web ' '| title=19th-century harpoon gives clue on whales ' '| website=The New York Times ' '| date=2007-06-13 ' '| url=https://www.nytimes.com/2007/06/13/world/americas/' '13iht-whale.1.6123654.html ' '| ref={{sfnref | The New York Times | 2007}} ' '| access-date=') self.assertIn(ct, o[1])
def test_nyt3(self): """oldstylct, 1 author""" i = 'http://www.nytimes.com/2007/12/25/world/africa/25kenya.html' o = urls_scr(i) ct = ('* {{cite web ' '| last=Gettleman ' '| first=Jeffrey ' '| title=Election Rules Complicate Kenya Race ' '| website=The New York Times ' '| date=2007-12-25 ' '| url=https://www.nytimes.com/2007/12/25/world/africa/' '25kenya.html ' '| access-date=') self.assertIn(ct, o[1])
def test_oth11(self): """Business News Daily.""" i = ('http://www.businessnewsdaily.com/6762-male-female-entrepreneurs' '.html?cmpid=514642_20140715_27858876') o = urls_scr(i) ct = ('* {{cite web ' '| last=Helmrich ' '| first=Brittney ' '| title=Male vs. Female Entrepreneurs: How Are They Different? ' '| website=Business News Daily ' '| date=2014-07-10 ' '| url=http://www.businessnewsdaily.com/6762-male-female-' 'entrepreneurs.html ' '| access-date=') self.assertIn(ct, o[1])