Exemplo n.º 1
0
    def get_xhtml_from_xml(self, xml_string):
        # IV convert the xml tags and attribute to HTML-TEI

        # remove comments
        content = re.sub(ur'(?musi)<!--.*?-->', ur'', xml_string)
        #
        self.c = 0
        self.conversion_cache = {}

        def replace_tag(match):
            if match.group(0) in self.conversion_cache:
                return self.conversion_cache[match.group(0)]

            self.c += 1
            if self.c > 10e6:
                exit()

            ret = match.group(0)

            tag = match.group(2)

            # don't convert <p>
            if tag in ['p', 'span']:
                return ret

            # any closing tag is /span
            if '/' in match.group(1):
                return '</span>'

            if tag == 'pb':
                print self.c

            # tag -
            ret = ur'<span data-dpt="%s"' % tag
            # attribute - assumes " for attribute values
            attrs = (re.sub(ur'(?ui)(\w+)(\s*=\s*")', ur'data-dpt-\1\2', match.group(3))).strip()
            if attrs:
                ret += ' ' + attrs
            ret += match.group(4)

            # print '', ret

            self.conversion_cache[match.group(0)] = ret

            return ret

        from digipal.utils import re_sub_fct
        content = re_sub_fct(content, ur'(?musi)(<\s*/?\s*)(\w+)([^>]*?)(/?\s*>)', replace_tag)

        return content
Exemplo n.º 2
0
    def operation_foliate(self, options, content):
        '''
            <span data-dpt="margin">fol. 1. b</span>[...]

            =>

            <p><span data-dpt="location" data-dpt-loctype="locus">1v</span><p>
        '''
        self._next_locus = u'1r'

        def replace(match):
            ret = match.group(0)

            locus = match.group(1)

            parts = re.match(ur'(?musi)^\s?(\d+)\.?\s*(b?)\.?$', locus)
            if not parts:
                print 'WARNING: no match [%s]' % repr(locus)
            else:
                lo = parts.group(1)
                lon = lo
                if parts.group(2) == 'b':
                    lon = u'%sr' % (int(lo) + 1,)
                    lo += 'v'
                else:
                    lon = lo + 'v'
                    lo += 'r'

                print '%s ("%s")' % (lo, locus)

                if lo != self._next_locus:
                    print 'WARNING: locus out of sequence, expected %s, got %s' % (self._next_locus, lo)

                self._next_locus = lon

                ret = u'</p><p><span data-dpt="location" data-dpt-loctype="locus">%s</span></p><p>' % lo

            return ret

        content = re_sub_fct(content, ur'(?musi)<span data-dpt="margin">\s*fol.([^<]*)</span>', replace)

        return content
Exemplo n.º 3
0
    def operation_pb2locus(self, options, content):
        start_page = 1
        if options:
            start_page = int(options[0])

        self.rep_option = start_page
        def replace(match):
            # !!! ASSUME pb is not in <p> or anything else

            number = re.sub(ur'^.*"([^"]+)".*$', ur'\1', match.group(1))
            if len(number) == len(match.group(1)):
                number = self.rep_option

            ret = u'<p><span data-dpt="location" data-dpt-loctype="locus">%s</span></p>' % number

            self.rep_option = get_int(number, default=self.rep_option) + 1

            return ret

        content = re_sub_fct(content, ur'<span\s+data-dpt\s*=\s*"pb"([^>]*)>', replace)

        return content