Пример #1
0
 def test_links(self):
     # link
     linktext = 'This is [an example](http://example.com/ "Title") inline link.'
     self.assertEqual('<p>This is <ref target="http://example.com/" n="Title">an example</ref> inline link.</p>',
         markdown_tei.convert(linktext))
     linktext = '[This link](http://example.net/) has no title attribute.'
     self.assertEqual('<p><ref target="http://example.net/">This link</ref> has no title attribute.</p>',
         markdown_tei.convert(linktext))
Пример #2
0
    def test_emphasis(self):

        # emphasis - bold
        self.assertEqual('<p>a <emph rend="bold">bold</emph> statement</p>',
            markdown_tei.convert('a **bold** statement'))
        # emphasis - italic
        self.assertEqual('<p>an <emph rend="italic">emphatic</emph> statement</p>',
            markdown_tei.convert('an *emphatic* statement'))
Пример #3
0
    def test_inline_html(self):
        # code, inline and block
        html = 'Something about <i>Murder in the Cathedral</i> by Eliot'
        self.assertEqual('<p>Something about <emph rend="italic">Murder in ' +
                         'the Cathedral</emph> by Eliot</p>',
                         markdown_tei.convert(html))

        self.assertEqual('<p>empty inline <emph rend="italic"></emph></p>',
                         markdown_tei.convert('empty inline <i/>'))
Пример #4
0
    def test_paragraphs(self):

        # single paragraph
        ptext = 'Single paragraph'
        self.assertEqual('<p>%s</p>' % ptext,
            markdown_tei.convert(ptext))
        # two paragraphs
        ptext2 = 'Second paragraph'
        self.assertEqual('<p>%s</p><p>%s</p>' % (ptext, ptext2),
            markdown_tei.convert('%s\n\n%s' % (ptext, ptext2)))
Пример #5
0
    def test_code(self):

        # code, inline and block
        self.assertEqual('<p>Here is some <code>code</code> inline.</p>',
            markdown_tei.convert('Here is some `code` inline.'))
        code_snippet = '''require 'redcarpet'
markdown = Redcarpet.new("Hello World!")
puts markdown.to_html'''
        self.assertEqual('<code lang="ruby">%s</code>' % code_snippet,
            markdown_tei.convert('```ruby\n%s\n```' % code_snippet))
Пример #6
0
    def test_images(self):
        # image
        imglink = '![Alt text](/path/to/img.png)'
        tei_imglink = markdown_tei.convert(imglink)
        self.assert_('<media' in tei_imglink)
        self.assert_(' mimetype="image/png"' in tei_imglink)
        self.assert_(' url="/path/to/img.png"' in tei_imglink)
        self.assert_('<desc><p>Alt text</p></desc>' in tei_imglink)

        imglink_title = '![Alt text](/path/to/img.jpg "Optional title")'
        tei_imglink_title = markdown_tei.convert(imglink_title)
        self.assert_('<desc><head>Optional title</head><p>Alt text</p></desc>'
            in tei_imglink_title)
Пример #7
0
    def test_headers(self):

        # headers
        self.assertEqual('<head type="level1">This is an H1</head>',
            markdown_tei.convert('# This is an H1'))
        self.assertEqual('<head type="level2">This is an H2</head>',
            markdown_tei.convert('## This is an H2'))
        self.assertEqual('<head type="level6">This is an H6</head>',
            markdown_tei.convert('###### This is an H6'))

        # horizontal rule
        self.assertEqual('<milestone rend="horizontal-rule"/>',
            markdown_tei.convert('* * *'))
Пример #8
0
    def test_lists(self):

        # list - unordered
        unordered_list = '* Red\n' + \
            '* Green\n' + \
            '* Blue'
        self.assertEqual('<list><item>Red</item><item>Green</item><item>Blue</item></list>',
            markdown_tei.convert(unordered_list))
        # list - ordered
        ordered_list = '1. Red\n' + \
            '2. Green\n' + \
            '3. Blue'
        self.assertEqual('<list rend="numbered"><item>Red</item><item>Green</item><item>Blue</item></list>',
            markdown_tei.convert(ordered_list))
Пример #9
0
    def test_footnotes(self):
        # footnote
        footnote = '''Footnotes[^1] have a label and content.

[^1]: This is some footnote content.'''
        tei_footnote = markdown_tei.convert(footnote)
        self.assert_('<p>Footnotes<ref target="#fn1" type="noteAnchor">1</ref> have' in tei_footnote)
        self.assert_('<note xml:id="fn1" type="footnote"><p>This is some footnote content.</p></note>'
            in tei_footnote)
Пример #10
0
    def test_blockquote(self):

        blockquote = '\n'.join([
            '> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet',
            '> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.',
            '> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.',
            '> ',
            '> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse',
            '> id sem consectetuer libero luctus adipiscing.'])
        tei_blockquote = markdown_tei.convert(blockquote)
        self.assert_(tei_blockquote.startswith('<quote><p>This is a blockquote'))
        self.assert_('risus.</p><p>Donec' in tei_blockquote)
        self.assert_(tei_blockquote.endswith('adipiscing.</p></quote>'))
Пример #11
0
    def test_tables(self):
        # table
        table = '''
Firstly  | Secondly
-------  | --------
A.1  | A.2
B.1  | B.2
'''
        tei_table = markdown_tei.convert(table)
        self.assert_('<table><head><row><cell role="label">Firstly</cell>' in
            tei_table)
        self.assert_('<row><cell role="data">B.1</cell><cell role="data">B.2</cell></row>'
            in tei_table)
Пример #12
0
    def test_video(self):
        # using html5 video embedded in markdown
        mimetype = 'video/mp4'
        url = 'http://some.video/file.mp4'
        video = '''<video controls='controls'>
  <source src='%s' type='%s'/>
</video>''' % (url, mimetype)

        self.assertEqual('<media mimeType="%s" url="%s"/>' % (mimetype, url),
            markdown_tei.convert(video))

        # source attribute tag order shouldn't matter
        video = '''<video controls='controls'>
          <source type='%s' src='%s'/>
        </video>''' % (mimetype, url)
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(video))

        video_plus = '''<script>console.log("test");</script>

%s

testing  ... again ...''' % video
        self.assert_(expected in markdown_tei.convert(video_plus))

        # inline video block
        video_attrs = {
            'url': 'http://www.w3schools.com/html/mov_bbb.mp4',
            'mimetype': 'video/mp4'
        }
        inline_video = '''applause
text inline with audio<video controls="controls">
<source src="%(url)s" type="%(mimetype)s"/>
</video>will cause the TEI to break''' % video_attrs
        inline_tei_video = markdown_tei.convert(inline_video)
        self.assert_('<media mimeType="%(mimetype)s" url="%(url)s"/>' %
                     video_attrs in inline_tei_video)

        # no type attribute - mimetype inferred from video src url
        mimetype = 'video/mp4'
        url = 'http://some.video/file.mp4'
        video = '''<video controls='controls'>
          <source src='%s'/>
        </video>''' % (url, )
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(video))

        mimetype = 'video/ogg'
        url = 'http://some.video/file.ogg'
        video = '''<video controls='controls'>
          <source src='%s'/>
        </video>''' % (url, )
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(video))

        # fallback mimetype where extension is not informative
        mimetype = 'video/mp4'
        url = 'http://some.video/file/without/ext/'
        video = '''<video controls='controls'>
          <source src='%s'/>
        </video>''' % (url, )
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(video))
Пример #13
0
    def test_audio(self):
        # using html5 audio embedded in markdown
        mimetype = 'audio/mpeg'
        url = 'http://some.audio/file.mp3'
        audio = '''<audio controls='controls'>
  <source src='%s' type='%s'/>
</audio>''' % (url, mimetype)

        self.assertEqual('<media mimeType="%s" url="%s"/>' % (mimetype, url),
            markdown_tei.convert(audio))

        # source attribute tag order shouldn't matter
        audio = '''<audio controls='controls'>
          <source type='%s' src='%s'/>
        </audio>''' % (mimetype, url)
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(audio))

        audio_plus = '''<script>console.log("test");</script>

%s

testing  ... again ...''' % audio
        self.assert_(expected in markdown_tei.convert(audio_plus))

        # inline audio block
        audio_attrs = {
            'url': 'http://soundbible.com/mp3/Audience_Applause-Matthiew11-1206899159.mp3',
            'mimetype': 'audio/mpeg'
        }
        inline_audio = '''applause
text inline with audio<audio controls="controls">
<source src="%(url)s" type="%(mimetype)s"/>
</audio>will cause the TEI to break''' % audio_attrs
        inline_tei_audio = markdown_tei.convert(inline_audio)
        self.assert_('<media mimeType="%(mimetype)s" url="%(url)s"/>' % audio_attrs
             in inline_tei_audio)

        # no type attribute - mimetype inferred from audio src url
        mimetype = 'audio/mpeg'
        url = 'http://some.audio/file.mp3'
        audio = '''<audio controls='controls'>
          <source src='%s'/>
        </audio>''' % (url, )
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(audio))

        mimetype = 'audio/aac'
        url = 'http://some.audio/file.aac'
        audio = '''<audio controls='controls'>
          <source src='%s'/>
        </audio>''' % (url, )
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(audio))

        # fallback mimetype where extension is not informative
        mimetype = 'audio/mpeg'
        url = 'http://some.audio/file/without/ext/'
        audio = '''<audio controls='controls'>
          <source src='%s'/>
        </audio>''' % (url, )
        expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url)
        self.assertEqual(expected, markdown_tei.convert(audio))
Пример #14
0
def annotation_to_tei(annotation, teivol):
    '''Generate a tei note from an annotation.  Sets annotation id,
    slugified tags as ana attribute, username as resp attribute, and
    annotation content is converted from markdown to TEI.

    :param annotation: :class:`~readux.annotations.models.Annotation`
    :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei
        document, for converting related page ARK uris into TEI ids
    :returns: :class:`readux.books.tei.Note`
    '''
    # NOTE: annotation created/edited dates are not included here
    # because they were determined not to be relevant for our purposes

    # sample note provided by Alice
    # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note>

    # convert markdown-formatted text content to tei
    note_content = markdown_tei.convert(annotation.text)
    # markdown results could be a list of paragraphs, and not a proper
    # xml tree; also, pags do not include namespace
    # wrap in a note element and set the default namespace as tei
    teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \
        (teimap.TEI_NAMESPACE, note_content),
        tei.Note)

    # what id do we want? annotation uuid? url?
    teinote.id = 'annotation-%s' % annotation.id  # can't start with numeric
    teinote.href = absolutize_url(annotation.get_absolute_url())
    teinote.type = 'annotation'

    # if an annotation includes tags, reference them by slugified id in @ana
    if 'tags' in annotation.info() and annotation.info()['tags']:
        tags = ' '.join(
            set('#%s' % slugify(t.strip()) for t in annotation.info()['tags']))
        teinote.ana = tags

    # if the annotation has an associated user, mark the author
    # as responsible for the note
    if annotation.user:
        teinote.resp = annotation.user.username

    # include full markdown of the annotation, as a backup for losing
    # content converting from markdown to tei, and for easy display
    teinote.markdown = annotation.text

    # if annotation contains related pages, generate a link group
    if annotation.related_pages:
        for rel_page in annotation.related_pages:
            page_ref = tei.Ref(text=rel_page, type='related page')
            # find tei page identifier from the page ark
            target = teivol.page_id_by_xlink(rel_page)
            if target is not None:
                page_ref.target = '#%s' % target
            teinote.related_pages.append(page_ref)

    # if annotation includes citations, add them to the tei
    # NOTE: expects these citations to be TEI encoded already (generated
    # by the zotero api and added via meltdown-zotero annotator plugin)
    if annotation.extra_data.get('citations', None):
        for bibl in annotation.extra_data['citations']:
            # zotero tei export currently includes an id that is not
            # a valid ncname (contains : and /)
            bibsoup = BeautifulSoup(bibl, 'xml')
            # convert xml id into the format we want:
            # zotero-#### (zotero item id)
            for bibl_struct in bibsoup.find_all('biblStruct'):
                bibl_struct['xml:id'] = 'zotero-%s' % \
                    bibl_struct['xml:id'].split('/')[-1]

            teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(),
                                                 tei.BiblStruct)
            teinote.citations.append(teibibl)

    return teinote
Пример #15
0
def annotation_to_tei(annotation, teivol):
    '''Generate a tei note from an annotation.  Sets annotation id,
    slugified tags as ana attribute, username as resp attribute, and
    annotation content is converted from markdown to TEI.

    :param annotation: :class:`~readux.annotations.models.Annotation`
    :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei
        document, for converting related page ARK uris into TEI ids
    :returns: :class:`readux.books.tei.Note`
    '''
    # NOTE: annotation created/edited dates are not included here
    # because they were determined not to be relevant for our purposes

    # sample note provided by Alice
    # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note>

    # convert markdown-formatted text content to tei
    note_content = markdown_tei.convert(annotation.text)
    # markdown results could be a list of paragraphs, and not a proper
    # xml tree; also, pags do not include namespace
    # wrap in a note element and set the default namespace as tei
    teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \
        (teimap.TEI_NAMESPACE, note_content),
        tei.Note)

    # what id do we want? annotation uuid? url?
    teinote.id = 'annotation-%s' % annotation.id  # can't start with numeric
    teinote.href = absolutize_url(annotation.get_absolute_url())
    teinote.type = 'annotation'

    # if an annotation includes tags, reference them by slugified id in @ana
    if 'tags' in annotation.info() and annotation.info()['tags']:
        tags = ' '.join(set('#%s' % slugify(t.strip())
                            for t in annotation.info()['tags']))
        teinote.ana = tags

    # if the annotation has an associated user, mark the author
    # as responsible for the note
    if annotation.user:
        teinote.resp = annotation.user.username

    # include full markdown of the annotation, as a backup for losing
    # content converting from markdown to tei, and for easy display
    teinote.markdown = annotation.text

    # if annotation contains related pages, generate a link group
    if annotation.related_pages:
        for rel_page in annotation.related_pages:
            page_ref = tei.Ref(text=rel_page, type='related page')
            # find tei page identifier from the page ark
            target = teivol.page_id_by_xlink(rel_page)
            if target is not None:
                page_ref.target = '#%s' % target
            teinote.related_pages.append(page_ref)

    # if annotation includes citations, add them to the tei
    # NOTE: expects these citations to be TEI encoded already (generated
    # by the zotero api and added via meltdown-zotero annotator plugin)
    if annotation.extra_data.get('citations', None):
        for bibl in annotation.extra_data['citations']:
            # zotero tei export currently includes an id that is not
            # a valid ncname (contains : and /)
            bibsoup = BeautifulSoup(bibl, 'xml')
            # convert xml id into the format we want:
            # zotero-#### (zotero item id)
            for bibl_struct in bibsoup.find_all('biblStruct'):
                bibl_struct['xml:id'] = 'zotero-%s' % \
                    bibl_struct['xml:id'].split('/')[-1]

            teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(),
                                                 tei.BiblStruct)
            teinote.citations.append(teibibl)

    return teinote