Exemplo n.º 1
0
 def test_oth3(self):
     """4 authors."""
     i = (
         'https://arstechnica.com/science/2007/09/'
         'the-pseudoscience-behind-homeopathy/'
     )
     o = urls_scr(i)
     ct = (
         '* {{cite web '
         '| last=Timmer '
         '| first=John '
         '| last2=Ford '
         '| first2=Matt '
         '| last3=Lee '
         '| first3=Chris '
         '| last4=Gitlin '
         '| first4=Jonathan '
         '| title=Diluting the scientific method:  Ars looks at homeopathy '
         '| website=Ars Technica '
         '| date=2007-09-12 '
         '| url=https://arstechnica.com/science/2007/09/'
         'the-pseudoscience-behind-homeopathy/ '
         '| access-date='
     )
     self.assertIn(ct, o[1])
Exemplo n.º 2
0
 def test_dm1(self):
     """4 authors"""
     o = urls_scr(
         'http://www.dailymail.co.uk/news/article-2633025/'
         'London-cleric-convicted-NYC-terrorism-trial.html'
     )
     self.assertEqual(
         '{{sfn | Malm | Witheridge | Drury | Bates | 2014}}', o[0]
     )
     self.assertIn(
         '* {{cite web '
         '| last=Malm '
         '| first=Sara '
         '| last2=Witheridge '
         '| first2=Annette '
         '| last3=Drury '
         '| first3=Ian '
         '| last4=Bates '
         '| first4=Daniel '
         '| title=Abu Hamza found guilty in US court of helping'
         ' Al-Qaeda terrorists '
         '| website=Daily Mail Online '
         '| date=2014-05-19 '
         '| url=http://www.dailymail.co.uk/news/article-2633025/'
         'London-cleric-convicted-NYC-terrorism-trial.html '
         '| access-date=',
         o[1],
     )
Exemplo n.º 3
0
 def test_nyt2(self):
     """newstylct, 2 authors"""
     ct = (
         '* {{cite web '
         '| last=Belson '
         '| first=Ken '
         '| last2=Sandomir '
         '| first2=Richard '
         '| title=$2 Billion for Clippers? In Time, '
         'It May Be a Steal for Steve Ballmer '
         '| website=The New York Times '
         '| date=2014-05-30 '
         '| url=https://www.nytimes.com/2014/05/31/sports/basketball/'
         'steven-a-ballmers-2-billion-play-for-clippers-is-a-big-bet-on-'
         'the-nba.html '
         '| access-date='
     )
     self.assertIn(
         ct,
         urls_scr(
             'https://www.nytimes.com/2014/05/31/sports/basketball/'
             'steven-a-ballmers-2-billion-play-for-clippers-is-a-big-bet-'
             'on-the-nba.html?hp'
         )[1],
     )
Exemplo n.º 4
0
    def test_citation_author_reverse_order(self):
        """Test correct detection of citation_author.

        first name and last name are in reverse order.

        """
        self.assertIn(
            '* {{cite web '
            '| last=Hartman '
            '| first=JudithAnn R. '
            '| last2=Nelson '
            '| first2=Eric A. '
            '| title=Automaticity in Computation and Student Success in '
            'Introductory Physical Science Courses '
            '| website=arXiv.org e-Print archive '
            '| date=2016-08-17 '
            '| url=https://arxiv.org/abs/1608.05006?utm_medium=email&'
            'utm_source=other&utm_campaign=opencourse.GdeNrll1EeSROyIACtiVvg.'
            'announcements%257Eopencourse.GdeNrll1EeSROyIACtiVvg.'
            '4xDVKzx5EeeJjRJrkGD1dA '
            '| access-date=',
            urls_scr(
                'https://arxiv.org/abs/1608.05006?utm_medium=email&utm_source='
                'other&utm_campaign=opencourse.GdeNrll1EeSROyIACtiVvg.'
                'announcements%257Eopencourse.GdeNrll1EeSROyIACtiVvg.'
                '4xDVKzx5EeeJjRJrkGD1dA'
            )[1],
        )
Exemplo n.º 5
0
 def test_oth12(self):
     """Times of India, author could not be detected."""
     i = ('http://timesofindia.indiatimes.com/city/pune/'
          'UK-allows-working-visas-for-Indian-students/'
          'articleshow/1163528927.cms?')
     o = urls_scr(i)
     sfn = "{{sfn | Kashyap | 2001}}"
     self.assertIn(sfn, o[0])
Exemplo n.º 6
0
def google_encrypted_scr(url, parsed_url, date_format):
    if parsed_url[2][:7] in {'/books', '/books/'}:
        # sample urls:
        # https://encrypted.google.com/books?id=6upvonUt0O8C
        # https://www.google.com/books?id=bwfoCAAAQBAJ&pg=PA32
        # https://www.google.com/books/edition/_/bwfoCAAAQBAJ?gbpv=1&pg=PA32
        return googlebooks_scr(parsed_url, date_format)
    return urls_scr(url, date_format)
Exemplo n.º 7
0
def test_single_line_meta_tags():
    """Issue #9."""
    assert ("* {{cite web | last=Shoichet | first=Catherine E. "
            "| title=Spill spews tons of coal ash into North Carolina's "
            "Dan River | website=CNN | date=2014-02-09 "
            "| url=http://www.cnn.com/2014/02/09/us/north-carolina-coal-ash"
            "-spill/index.html | access-date=") in urls_scr(
                'https://edition.cnn.com/'
                '2014/02/09/us/north-carolina-coal-ash-spill/')[1]
Exemplo n.º 8
0
 def test_indaily(self):
     self.assertIn(
         "* {{cite web | last=Siebert | first=Bension "
         "| title=Epidemics expert questions Marshall's schools advice "
         "| website=InDaily | date=2020-03-19 "
         "| url=https://indaily.com.au/news/2020/03/19/epidemics-expert-contradicts-marshalls-schools-advice/ "
         "| access-date=",
         urls_scr(
             'https://indaily.com.au/news/2020/03/19/epidemics-expert-contradicts-marshalls-schools-advice/'
         )[1])
Exemplo n.º 9
0
def test_reverse_name():
    """Author is `Martin, Tracy`. Tracy should be the first name."""
    assert ('* {{cite web '
            '| last=Martin '
            '| first=Tracy '
            '| title=Dynamometers Explained '
            '| website=HighBeam Research '
            '| date=2014-07-01 '
            '| url=http://www.highbeam.com/doc/1P3-3372742961.html '
            '| access-date='
            ) in urls_scr('http://www.highbeam.com/doc/1P3-3372742961.html')[1]
Exemplo n.º 10
0
def test_bbc5():
    """news.bbc.co.uk, 1 author"""
    assert ("* {{cite web "
            "| last=Madslien "
            "| first=Jorn "
            "| title=Inside the Bentley factory "
            "| website=BBC NEWS "
            "| date=2002-12-24 "
            "| url=http://news.bbc.co.uk/2/hi/business/2570109.stm "
            "| access-date="
            ) in urls_scr('http://news.bbc.co.uk/2/hi/business/2570109.stm')[1]
Exemplo n.º 11
0
def test_abc_author():
    assert ('* {{cite web | last=Ferguson | first=Kathleen '
            '| title=Glow worms in Wollemi National Park survived Gospers '
            'Mountain bushfire - ABC News '
            '| website=ABC (Australian Broadcasting Corporation) '
            '| date=2020-09-06 | url=https://www.abc.net.au/news/2020-09-06/'
            'glow-worms-in-wollemi-national-park-survived-summer-bushfire/'
            '12634762 | access-date=') in urls_scr(
                'https://www.abc.net.au/news/2020-09-06/'
                'glow-worms-in-wollemi-national-park-survived-summer-bushfire/'
                '12634762')[1]
Exemplo n.º 12
0
def test_bbc2():
    """1 author"""
    assert ('* {{cite web '
            '| last=Gage '
            '| first=Suzi '
            '| title=Sea otter return boosts ailing seagrass in California '
            '| website=BBC News '
            '| date=2013-08-26 '
            '| url=http://www.bbc.com/news/science-environment-23814524 '
            '| access-date=') in urls_scr(
                'http://www.bbc.com/news/science-environment-23814524')[1]
Exemplo n.º 13
0
 def test_bbc1(self):
     """no authors"""
     i = 'https://www.bbc.com/news/world-asia-27653361'
     o = urls_scr(i)
     ct = ("* {{cite web "
           "| title=US 'received Qatar assurances' on Afghan prisoner deal "
           "| website=BBC News "
           "| date=2014-06-01 "
           "| url=http://www.bbc.com/news/world-asia-27653361 "
           "| ref={{sfnref | BBC News | 2014}} "
           "| access-date=")
     self.assertIn(ct, o[1])
Exemplo n.º 14
0
def test_oth7():
    """Contains a By Topic line and also the byline contains ' | '."""
    assert ('* {{cite web '
            '| last=Chandler '
            '| first=David L. '
            '| title=Traffic lights: There’s a better way '
            '| website=MIT News '
            '| date=2014-07-07 '
            '| url=http://news.mit.edu/2014/'
            'traffic-lights-theres-a-better-way-0707 '
            '| access-date=') in urls_scr(
                'http://news.mit.edu/2014/'
                'traffic-lights-theres-a-better-way-0707')[1]
Exemplo n.º 15
0
def test_bbc4():
    """news.bbc.co.uk, 1 author"""
    assert (
        "* {{cite web "
        "| last=Jones "
        "| first=Meirion "
        "| title=Malaria advice 'risks lives' "
        "| website=BBC NEWS "
        "| date=2006-07-13 "
        "| url="
        "http://news.bbc.co.uk/2/hi/programmes/newsnight/5178122.stm "
        "| access-date=") in urls_scr(
            'http://news.bbc.co.uk/2/hi/programmes/newsnight/5178122.stm')[1]
Exemplo n.º 16
0
def test_empty_meta_author_content():
    """Test that the output will not be malformed because empty meta."""
    assert (
        "* {{cite web "
        "| title=UAE's Enoc pays Iran $4 billion in oil dues "
        "| website=Al Jazeera "
        "| date=2017-05-29 "
        "| url=http://www.aljazeera.com/news/2017/05/uae-enoc-pays-iran-4-"
        "billion-oil-dues-170529171315570.html "
        "| ref={{sfnref | Al Jazeera | 2017}} "
        "| access-date=") in urls_scr(
            'http://www.aljazeera.com/news/2017/05/'
            'uae-enoc-pays-iran-4-billion-oil-dues-170529171315570.html')[1]
Exemplo n.º 17
0
 def test_bbc3(self):
     """https version of bbc2 (differs a lot!)"""
     i = 'https://www.bbc.com/news/science-environment-23814524'
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| last=Gage '
           '| first=Suzi '
           '| title=Sea otter return boosts ailing seagrass in California '
           '| website=BBC News '
           '| date=2013-08-26 '
           '| url=http://www.bbc.com/news/science-environment-23814524 '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 18
0
def test_bbc6():
    """bbc.com, 1 author"""
    i = 'http://www.bbc.com/news/science-environment-26267918'
    o = urls_scr(i)
    ct = ("* {{cite web "
          "| last=Amos "
          "| first=Jonathan "
          "| title=European Space Agency picks Plato planet-hunting mission "
          "| website=BBC News "
          "| date=2014-02-20 "
          "| url=http://www.bbc.com/news/science-environment-26267918 "
          "| access-date=")
    assert ct in o[1]
Exemplo n.º 19
0
def test_language_not_de_csbc():
    assert (
        "{{cite web "
        "| last=Martin "
        "| first=Emmie "
        "| title=In San Francisco, households earning $117,000 qualify as ‘low income’ "
        "| website=CNBC "
        "| date=2018-06-28 "
        "| url=https://www.cnbc.com/2018/06/28/families-earning-117000-qualify-as-low-income-in-san-francisco.html "
        "| access-date="
    ) == urls_scr(
        'https://www.cnbc.com/2018/06/28/families-earning-117000-qualify-as-low-income-in-san-francisco.html'
    )[1][2:-12]
Exemplo n.º 20
0
def test_oth5():
    """Getting the date is tricky here."""
    o = urls_scr('http://www.magiran.com/npview.asp?ID=1410487')
    assert '{{sfn | نوري | 2007}}' in o[0]
    assert ('* {{cite web '
            '| last=نوري '
            '| first=آزاده شهمير '
            '| title=روزنامه سرمايه86/3/1: دكتر طاهر صباحي، محقق و مجموعه دار'
            ' فرش: بازار جهاني با توليد فرش هنري نصيب ايران مي شود '
            '| website=magiran.com '
            '| date=2007-05-22 '
            '| url=http://www.magiran.com/npview.asp?ID=1410487 '
            '| language=fa '
            '| access-date=') in o[1]
Exemplo n.º 21
0
def test_oth6():
    """Detection of website name."""
    o = urls_scr('http://www.farsnews.com/newstext.php?nn=13930418000036')
    assert "{{sfn | ''خبرگزاری فارس'' | 2014}}" in o[0]
    # Fars news is using 'خبرگزاری فارس' as og:author which is wrong
    # and thats why its name is not italicized in sfn.
    assert ('* {{cite web '
            '| title=آیت\u200cالله محمدی گیلانی دارفانی را وداع گفت '
            '| website=خبرگزاری فارس '
            '| date=2014-07-09 '
            '| url=http://www.farsnews.com/newstext.php?nn=13930418000036 '
            '| language=fa '
            '| ref={{sfnref | خبرگزاری فارس | 2014}} '
            '| access-date=') in o[1]
Exemplo n.º 22
0
def test_invalid_name():
    """Test that URL does not fail with InvalidNameError."""
    assert ('* {{cite web | title=انتخابات 96 به روایت آمار '
            '| website=پایگاه اطلاع رسانی شبکه خبر صدا'
            ' و سیمای جمهوری اسلامی ایران |'
            ' date=2017-05-24 | url=http://www.irinn.ir/fa/news/499654 '
            '| language=fa | ref={{sfnref |'
            ' پایگاه اطلاع رسانی شبکه خبر'
            ' صدا و سیمای جمهوری اسلامی ایران | 2017}} |'
            ' access-date=') in urls_scr(
                'http://www.irinn.ir/fa/news/499654/'
                '%D8%A7%D9%86%D8%AA%D8%AE%D8%A7%D8%A8%D8%A7%D8%AA-96-'
                '%D8%A8%D9%87-%D8%B1%D9%88%D8%A7%DB%8C%D8%AA-'
                '%D8%A2%D9%85%D8%A7%D8%B1')[1]
Exemplo n.º 23
0
 def test_oth14(self):
     """thebulletin.org"""
     i = ('http://www.independent.co.uk/news/business/'
          'the-investment-column-tt-group-1103208.html')
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| title=The Investment column: TT Group '
           '| website=The Independent '
           '| date=1999-06-29 '
           '| url=http://www.independent.co.uk/news/business/'
           'the-investment-column-tt-group-1103208.html '
           '| ref={{sfnref | The Independent | 1999}} '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 24
0
 def test_oth12(self):
     """thebulletin.org"""
     i = 'http://thebulletin.org/evidence-shows-iron-dome-not-working7318'
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| last=Postol '
           '| first=Theodore A. '
           '| title=The evidence that shows Iron Dome is not working '
           '| website=Bulletin of the Atomic Scientists '
           '| date=2014-07-19 '
           '| url=http://thebulletin.org/'
           'evidence-shows-iron-dome-not-working7318 '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 25
0
 def test_tgd1(self):
     """ABCNews. Wrong author:  | last=News | first=ABC."""
     i = 'http://abcnews.go.com/blogs/headlines/2006/12/saddam_executed/'
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| last=Ross '
           '| first=Brian '
           '| title=Saddam Executed; An Era Comes to an End '
           '| website=ABC News Blogs '
           '| date=2006-12-30 '
           '| url=http://abcnews.go.com/blogs/headlines/2006/12/'
           'saddam_executed/ '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 26
0
 def test_tgd3(self):
     """"Staff" in author name."""
     i = ('http://www.tgdaily.com/space-features/'
          '82906-sma-reveals-giant-star-cluster-in-the-making')
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| title=SMA reveals giant star cluster in the making '
           '| website=TG Daily '
           '| date=2013-12-17 '
           '| url=http://www.tgdaily.com/space-features/'
           '82906-sma-reveals-giant-star-cluster-in-the-making '
           '| ref={{sfnref | TG Daily | 2013}} '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 27
0
 def test_tgd2(self):
     """Hard to find author and date."""
     i = ('http://www.tgdaily.com/web/'
          '100381-apple-might-buy-beats-for-32-billion')
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| title=Apple might buy Beats for $3.2 billion '
           '| website=TG Daily '
           '| date=2014-05-09 '
           '| url=http://www.tgdaily.com/web/'
           '100381-apple-might-buy-beats-for-32-billion '
           '| ref={{sfnref | TG Daily | 2014}} '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 28
0
 def test_nyt5(self):
     """special case for date format (not in usual meta tags)"""
     i = ('https://www.nytimes.com/2007/06/13/world/americas/'
          '13iht-whale.1.6123654.html')
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| title=19th-century harpoon gives clue on whales '
           '| website=The New York Times '
           '| date=2007-06-13 '
           '| url=https://www.nytimes.com/2007/06/13/world/americas/'
           '13iht-whale.1.6123654.html '
           '| ref={{sfnref | The New York Times | 2007}} '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 29
0
 def test_nyt3(self):
     """oldstylct, 1 author"""
     i = 'http://www.nytimes.com/2007/12/25/world/africa/25kenya.html'
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| last=Gettleman '
           '| first=Jeffrey '
           '| title=Election Rules Complicate Kenya Race '
           '| website=The New York Times '
           '| date=2007-12-25 '
           '| url=https://www.nytimes.com/2007/12/25/world/africa/'
           '25kenya.html '
           '| access-date=')
     self.assertIn(ct, o[1])
Exemplo n.º 30
0
 def test_oth11(self):
     """Business News Daily."""
     i = ('http://www.businessnewsdaily.com/6762-male-female-entrepreneurs'
          '.html?cmpid=514642_20140715_27858876')
     o = urls_scr(i)
     ct = ('* {{cite web '
           '| last=Helmrich '
           '| first=Brittney '
           '| title=Male vs. Female Entrepreneurs: How Are They Different? '
           '| website=Business News Daily '
           '| date=2014-07-10 '
           '| url=http://www.businessnewsdaily.com/6762-male-female-'
           'entrepreneurs.html '
           '| access-date=')
     self.assertIn(ct, o[1])