Exemplo n.º 1
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ('1234',
        u'/1234'),
        ('1234/',
        u'/1234'),
        # GIST_URL_RGX
        ('https://gist.github.com/username/1234',
        u'/1234'),
        ('https://gist.github.com/username/1234/',
        u'/1234'),
        # GITHUB_URL_RGX
        ('https://github.com/user/repo/blob/master/path/file.ipynb',
        u'/github/user/repo/blob/master/path/file.ipynb'),
        ('http://github.com/user/repo/blob/master/path/file.ipynb',
        u'/github/user/repo/blob/master/path/file.ipynb'),
        # URL
        ('https://example.org/ipynb',
        u'/urls/example.org/ipynb'),
        ('http://example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('example.org/ipynb',
        u'/url/example.org/ipynb'),
        (u'example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb',
        u'/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb'),
    )
    for (ipynb_uri, expected_output) in test_data:
        output = transform_ipynb_uri(ipynb_uri)
        assert output == expected_output
Exemplo n.º 2
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ("1234", u"/1234"),
        ("1234/", u"/1234"),
        # GIST_URL_RGX
        ("https://gist.github.com/user-name/1234", u"/1234"),
        ("https://gist.github.com/user-name/1234/", u"/1234"),
        # GITHUB_URL_RGX
        (
            "https://github.com/user-name_/repo-name_/blob/master/path/file.ipynb",
            u"/github/user-name_/repo-name_/blob/master/path/file.ipynb",
        ),
        (
            "http://github.com/user-name_/repo-name_/blob/master/path/file.ipynb",
            u"/github/user-name_/repo-name_/blob/master/path/file.ipynb",
        ),
        (
            "https://github.com/user-name_/repo-name_/tree/master/path/",
            u"/github/user-name_/repo-name_/tree/master/path/",
        ),
        # GITHUB_USER_RGX
        ("ipy-thon", u"/github/ipy-thon/"),
        # GITHUB_USER_REPO_RGX
        ("ipy-thon/ipy-thon", u"/github/ipy-thon/ipy-thon/tree/master/"),
        # DropBox Urls
        (u"http://www.dropbox.com/s/bar/baz.qux",
         u"/url/dl.dropbox.com/s/bar/baz.qux"),
        (
            u"https://www.dropbox.com/s/zip/baz.qux",
            u"/urls/dl.dropbox.com/s/zip/baz.qux",
        ),
        (
            u"https://www.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb?dl=1",
            u"/urls/dl.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb",
        ),
        # URL
        ("https://example.org/ipynb", u"/urls/example.org/ipynb"),
        ("http://example.org/ipynb", u"/url/example.org/ipynb"),
        ("example.org/ipynb", u"/url/example.org/ipynb"),
        (u"example.org/ipynb", u"/url/example.org/ipynb"),
        (
            "https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb",
            u"/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb",
        ),
        (
            "https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb?query=string&is=1",
            u"/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb/%3Fquery%3Dstring%26is%3D1",
        ),
    )
    uri_rewrite_list = provider_uri_rewrites(default_rewrites)
    for ipynb_uri, expected_output in test_data:
        output = utils.transform_ipynb_uri(ipynb_uri, uri_rewrite_list)
        nt.assert_equal(
            output,
            expected_output,
            "%s => %s != %s" % (ipynb_uri, output, expected_output),
        )
Exemplo n.º 3
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ('1234',
        u'/1234'),
        ('1234/',
        u'/1234'),
        # GIST_URL_RGX
        ('https://gist.github.com/user-name/1234',
        u'/1234'),
        ('https://gist.github.com/user-name/1234/',
        u'/1234'),
        # GITHUB_URL_RGX
        ('https://github.com/user-name_/repo-name_/blob/master/path/file.ipynb',
        u'/github/user-name_/repo-name_/blob/master/path/file.ipynb'),
        ('http://github.com/user-name_/repo-name_/blob/master/path/file.ipynb',
        u'/github/user-name_/repo-name_/blob/master/path/file.ipynb'),
        ('https://github.com/user-name_/repo-name_/tree/master/path/',
        u'/github/user-name_/repo-name_/tree/master/path/'),
        # GITHUB_USER_RGX
        ('ipy-thon',
        u'/github/ipy-thon/'),
        # GITHUB_USER_REPO_RGX
        ('ipy-thon/ipy-thon',
        u'/github/ipy-thon/ipy-thon/tree/master/'),
        #DropBox Urls
        ( u'http://www.dropbox.com/s/bar/baz.qux',
          u'/url/dl.dropbox.com/s/bar/baz.qux'),
        ( u'https://www.dropbox.com/s/zip/baz.qux',
          u'/urls/dl.dropbox.com/s/zip/baz.qux'),
        ( u'https://www.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb?dl=1',
          u'/urls/dl.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb'),
        # URL
        ('https://example.org/ipynb',
        u'/urls/example.org/ipynb'),
        ('http://example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('example.org/ipynb',
        u'/url/example.org/ipynb'),
        (u'example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb',
        u'/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb'),
        ('https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb?query=string&is=1',
        u'/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb/%3Fquery%3Dstring%26is%3D1'),

    )
    uri_rewrite_list = provider_uri_rewrites(default_rewrites)
    for ipynb_uri, expected_output in test_data:
        output = utils.transform_ipynb_uri(ipynb_uri, uri_rewrite_list)
        nt.assert_equal(output, expected_output, "%s => %s != %s" % (
            ipynb_uri, output, expected_output
        ))
Exemplo n.º 4
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ('1234',
        u'/1234'),
        ('1234/',
        u'/1234'),
        # GIST_URL_RGX
        ('https://gist.github.com/user-name/1234',
        u'/1234'),
        ('https://gist.github.com/user-name/1234/',
        u'/1234'),
        # GITHUB_URL_RGX
        ('https://github.com/user-name_/repo-name_/blob/master/path/file.ipynb',
        u'/github/user-name_/repo-name_/blob/master/path/file.ipynb'),
        ('http://github.com/user-name_/repo-name_/blob/master/path/file.ipynb',
        u'/github/user-name_/repo-name_/blob/master/path/file.ipynb'),
        ('https://github.com/user-name_/repo-name_/tree/master/path/',
        u'/github/user-name_/repo-name_/tree/master/path/'),
        # GITHUB_USER_RGX
        ('ipy-thon',
        u'/github/ipy-thon/'),
        # GITHUB_USER_REPO_RGX
        ('ipy-thon/ipy-thon',
        u'/github/ipy-thon/ipy-thon/tree/master/'),
        #DropBox Urls
        ( u'http://www.dropbox.com/s/bar/baz.qux',
          u'/url/dl.dropbox.com/s/bar/baz.qux'),
        ( u'https://www.dropbox.com/s/zip/baz.qux',
          u'/urls/dl.dropbox.com/s/zip/baz.qux'),
        ( u'https://www.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb',
          u'/urls/dl.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb'),
        # URL
        ('https://example.org/ipynb',
        u'/urls/example.org/ipynb'),
        ('http://example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('example.org/ipynb',
        u'/url/example.org/ipynb'),
        (u'example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb',
        u'/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb'),
    )
    for (ipynb_uri, expected_output) in test_data:
        output = utils.transform_ipynb_uri(ipynb_uri)
        nt.assert_equal(output, expected_output, "%s => %s != %s" % (
            ipynb_uri, output, expected_output
        ))
Exemplo n.º 5
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ("1234", u"/1234"),
        ("1234/", u"/1234"),
        # GIST_URL_RGX
        ("https://gist.github.com/user-name/1234", u"/1234"),
        ("https://gist.github.com/user-name/1234/", u"/1234"),
        # GITHUB_URL_RGX
        (
            "https://github.com/user-name_/repo-name_/blob/master/path/file.ipynb",
            u"/github/user-name_/repo-name_/blob/master/path/file.ipynb",
        ),
        (
            "http://github.com/user-name_/repo-name_/blob/master/path/file.ipynb",
            u"/github/user-name_/repo-name_/blob/master/path/file.ipynb",
        ),
        (
            "https://github.com/user-name_/repo-name_/tree/master/path/",
            u"/github/user-name_/repo-name_/tree/master/path/",
        ),
        # GITHUB_USER_RGX
        ("ipy-thon", u"/github/ipy-thon/"),
        # GITHUB_USER_REPO_RGX
        ("ipy-thon/ipy-thon", u"/github/ipy-thon/ipy-thon/tree/master/"),
        # DropBox Urls
        (u"http://www.dropbox.com/s/bar/baz.qux", u"/url/dl.dropbox.com/s/bar/baz.qux"),
        (u"https://www.dropbox.com/s/zip/baz.qux", u"/urls/dl.dropbox.com/s/zip/baz.qux"),
        (
            u"https://www.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb?dl=1",
            u"/urls/dl.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb",
        ),
        # URL
        ("https://example.org/ipynb", u"/urls/example.org/ipynb"),
        ("http://example.org/ipynb", u"/url/example.org/ipynb"),
        ("example.org/ipynb", u"/url/example.org/ipynb"),
        (u"example.org/ipynb", u"/url/example.org/ipynb"),
        (
            "https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb",
            u"/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb",
        ),
    )
    uri_rewrite_list = provider_uri_rewrites(default_rewrites)
    for ipynb_uri, expected_output in test_data:
        output = utils.transform_ipynb_uri(ipynb_uri, uri_rewrite_list)
        nt.assert_equal(output, expected_output, "%s => %s != %s" % (ipynb_uri, output, expected_output))
Exemplo n.º 6
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ('1234',
        u'/1234'),
        ('1234/',
        u'/1234'),
        # GIST_URL_RGX
        ('https://gist.github.com/username/1234',
        u'/1234'),
        ('https://gist.github.com/username/1234/',
        u'/1234'),
        # GITHUB_URL_RGX
        ('https://github.com/user/repo/blob/master/path/file.ipynb',
        u'/github/user/repo/blob/master/path/file.ipynb'),
        ('http://github.com/user/repo/blob/master/path/file.ipynb',
        u'/github/user/repo/blob/master/path/file.ipynb'),
        # GITHUB_USER_RGX
        ('ipython',
        u'/github/ipython/'),
        # GITHUB_USERREPO_RGX
        ('ipython/ipython',
        u'/github/ipython/ipython/tree/master/'),
        #DropBox Urls
        ( u'http://www.dropbox.com/s/bar/baz.qux',
          u'/url/dl.dropbox.com/s/bar/baz.qux'),
        ( u'https://www.dropbox.com/s/zip/baz.qux',
          u'/urls/dl.dropbox.com/s/zip/baz.qux'),
        ( u'https://www.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb',
          u'/urls/dl.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb'),
        # URL
        ('https://example.org/ipynb',
        u'/urls/example.org/ipynb'),
        ('http://example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('example.org/ipynb',
        u'/url/example.org/ipynb'),
        (u'example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb',
        u'/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb'),
    )
    for (ipynb_uri, expected_output) in test_data:
        output = utils.transform_ipynb_uri(ipynb_uri)
        nt.assert_equal(output, expected_output)
Exemplo n.º 7
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ('1234',
        u'/1234'),
        ('1234/',
        u'/1234'),
        # GIST_URL_RGX
        ('https://gist.github.com/username/1234',
        u'/1234'),
        ('https://gist.github.com/username/1234/',
        u'/1234'),
        # GITHUB_URL_RGX
        ('https://github.com/user/repo/blob/master/path/file.ipynb',
        u'/github/user/repo/blob/master/path/file.ipynb'),
        ('http://github.com/user/repo/blob/master/path/file.ipynb',
        u'/github/user/repo/blob/master/path/file.ipynb'),
        #DropBox Urls
        ( u'http://www.dropbox.com/u/bar/baz.qux',
          u'/url/dl.dropbox.com/u/bar/baz.qux'),
        ( u'https://www.dropbox.com/u/zip/baz.qux',
          u'/urls/dl.dropbox.com/u/zip/baz.qux'),
        # URL
        ('https://example.org/ipynb',
        u'/urls/example.org/ipynb'),
        ('http://example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('example.org/ipynb',
        u'/url/example.org/ipynb'),
        (u'example.org/ipynb',
        u'/url/example.org/ipynb'),
        ('https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb',
        u'/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb'),
    )
    for (ipynb_uri, expected_output) in test_data:
        output = utils.transform_ipynb_uri(ipynb_uri)
        nt.assert_equal(output, expected_output)
Exemplo n.º 8
0
def test_transform_ipynb_uri():
    test_data = (
        # GIST_RGX
        ('1234', u'/1234'),
        ('1234/', u'/1234'),
        # GIST_URL_RGX
        ('https://gist.github.com/username/1234', u'/1234'),
        ('https://gist.github.com/username/1234/', u'/1234'),
        # GITHUB_URL_RGX
        ('https://github.com/user/repo/blob/master/path/file.ipynb',
         u'/github/user/repo/blob/master/path/file.ipynb'),
        ('http://github.com/user/repo/blob/master/path/file.ipynb',
         u'/github/user/repo/blob/master/path/file.ipynb'),
        # URL
        ('https://example.org/ipynb', u'/urls/example.org/ipynb'),
        ('http://example.org/ipynb', u'/url/example.org/ipynb'),
        ('example.org/ipynb', u'/url/example.org/ipynb'),
        (u'example.org/ipynb', u'/url/example.org/ipynb'),
        ('https://gist.github.com/user/1234/raw/a1b2c3/file.ipynb',
         u'/urls/gist.github.com/user/1234/raw/a1b2c3/file.ipynb'),
    )
    for (ipynb_uri, expected_output) in test_data:
        output = utils.transform_ipynb_uri(ipynb_uri)
        assert output == expected_output
Exemplo n.º 9
0
def insert_notebook(url, screenshot=True, nb=None):
    """
    Returns
    -------
    dict {'success': True/False}
    """
    # TODO: do ajax-based async
    from web.models import Notebook

    # sanitize url
    url = url.replace('https', 'http')

    is_nbviewer = False
    try:
        url = unshorten_url(url)
        r = requests.get(url)
        if 'text/html' in r.headers['content-type']:
            # check that it's a notebook
            tmp_html = urlopen(url)
            is_nbviewer = ("Notebook on nbviewer" in tmp_html)
        if is_nbviewer:
            html_url = url
        else:
            html_url = urlparse.urljoin('http://nbviewer.ipython.org',
                                        transform_ipynb_uri(url))
        print('Downloading %s' % html_url)
        html = urlopen(html_url)
    except (urllib2.HTTPError, urllib2.URLError, socket.timeout, ssl.SSLError,
            requests.exceptions.SSLError,
            requests.sessions.InvalidSchema) as e:
        if nb is not None:
            nb.failures_access += 1
        print('Failed in downloading', e)
        return {'status': 'failure', 'reason': 'Failed accessing the notebook'}

    extracted = extraction.Extractor().extract(html, source_url=html_url)
    if len(extracted.titles) > 1:
        title = extracted.titles[1]
    else:
        title = extracted.descriptions[1]
    words_title = title.split(' ')
    if len(words_title) > 20:
        title = ' '.join(words_title[:20]) + ' ...'
    if len(extracted.descriptions) > 1:
        description = extracted.descriptions[1]
    else:
        description = ''
    words_description = description.split(' ')
    if len(words_description) > 40:
        description = ' '.join(words_description[:40]) + ' ...'

    # some more sanitation
    if title.startswith('This web site does not host'):
        # this is the nbviewer default title
        title = 'No title'
    title = title.strip(u'¶')

    #similar = Notebook.objects.filter(title=title, description=description)
    #if len(Notebook.objects.filter(title=title, description=description)) > 0:
    #return {'status': 'failure', 'reason': 'duplicate document', 'pk': similar[0].pk}

    if nb is None:
        obj, created = Notebook.objects.get_or_create(url=url)
    else:
        obj = nb
        created = False
    # screenshot
    if screenshot:
        out = make_screenshots(html_url, obj.pk)
        if out['status'] == 'failure':
            if created:
                obj.delete()
            else:
                obj.failures_access += 1
            return out
        else:
            obj.thumb_img = out['thumb']

    # XXX remove assert with error messages
    assert len(title) < 500
    obj.title = title
    assert len(description) < 2000
    obj.description = description
    assert len(html_url) < 1000
    obj.html_url = html_url
    assert len(url) < 1000
    obj.url = url
    obj.full_html = html

    obj.last_accessed_date = datetime.now().date()
    obj.save()
    return {'status': 'success', 'pk': obj.pk, 'created': created}