示例#1
0
    def command_unit(self):
        # from digipal_text.models import TextUnit
        # rs = TextUnit.objects
        from digipal_text.models import TextContentXML
        from digipal_text.views.viewer import get_fragment_extent, get_all_units
        rid = self.get_arg(1)
        fitler = {}
        if rid:
            fitler = {'id': rid}
        ctx = TextContentXML.objects.filter(**fitler).first()

        cnt = 0

        if ctx:
            print ctx
            location_type = self.get_arg(2, 'locus')

            location = self.get_arg(3, None)
            units = get_all_units(ctx.content, location_type)
            
            for unit in units:
                if location is None or dputils.is_unit_in_range(unit['unitid'], location):
                    cnt += 1
                    print '%-10s %-5s %-10s' % (unit['unitid'], len(unit['content']), repr(unit['content'][:10]))
                    if location:
                        print repr(unit['content'])

            print '%s units' % cnt
示例#2
0
    def command_unit(self):
        # from digipal_text.models import TextUnit
        # rs = TextUnit.objects
        from digipal_text.models import TextContentXML
        from digipal_text.views.viewer import get_fragment_extent, get_all_units
        rid = self.get_arg(1)
        fitler = {}
        if rid:
            fitler = {'id': rid}
        ctx = TextContentXML.objects.filter(**fitler).first()

        cnt = 0

        if ctx:
            print ctx
            location_type = self.get_arg(2, 'locus')

            location = self.get_arg(3, None)
            units = get_all_units(ctx.content, location_type)

            for unit in units:
                if location is None or dputils.is_unit_in_range(
                        unit['unitid'], location):
                    cnt += 1
                    print '%-10s %-5s %-10s' % (unit['unitid'],
                                                len(unit['content']),
                                                repr(unit['content'][:10]))
                    if location:
                        print repr(unit['content'])

            print '%s units' % cnt
示例#3
0
    def command_search(self):
        if len(self.args) < 3:
            raise CommandError('Convert requires 2 arguments')

        from digipal.management.commands.utils import get_stats_from_xml_string
        from digipal_text.views.viewer import get_fragment_extent, get_all_units

        pattern = unicode(self.args[3])
        #pattern = ur'.{1,30}ħ.{1,30}'
        pattern = ur'(?musi)#MSTART#(.*?)#MEND#'

        stats = {}
        cnt = 0
        import regex as re
        all_entries = []
        for tcx in TextContentXML.objects.filter(
                text_content__item_part_id=self.args[1],
                text_content__type__slug=self.args[2]):
            if 1:
                for match in re.findall(pattern, tcx.content):
                    cnt += 1
                    if len(re.findall(ur'<p>', match)) > 1:
                        print '>1'
                    entries = re.findall(ur'"entry">(.*?)<', match)
                    if entries:
                        all_entries.extend(entries)
            if 0:
                units = get_all_units(tcx.content, 'entry')
                for unit in units:
                    for match in re.findall(pattern, unit['content']):
                        #print unit['unitid'], repr(match)
                        #print repr(match)
                        #print re.findall(ur'<p>', match)
                        cnt += 1
示例#4
0
    def command_download(self):
        ret = ur''

        recordid = self.args[1]
        unitid = ''
        if len(self.args) > 2: unitid = self.args[2]
        from digipal_text.models import TextContentXML
        from digipal_text.views.viewer import get_fragment_extent, get_all_units
        text_content_xml = TextContentXML.objects.get(id=recordid)
        content = text_content_xml.content

        suffix = ''
        if unitid:
            suffix = '-unit'
            units = get_all_units(content, 'entry')
            for unit in units:
                if unit['unitid'] == unitid:
                    ret = ur'<root>%s</root>' % unit['content']
        else:
            ret = content

        import regex

        if ret is None:
            ret = u''

        # print repr(ret)
        file_name = 'tcx%s%s.xml' % (text_content_xml.id, suffix)
        from digipal.utils import write_file
        write_file(file_name, ret)
        print 'Written file %s ' % file_name
示例#5
0
    def command_download(self):
        ret = ur''

        recordid = self.args[1]
        unitid = ''
        if len(self.args) > 2: unitid = self.args[2]
        from digipal_text.models import TextContentXML
        from digipal_text.views.viewer import get_fragment_extent, get_all_units
        text_content_xml = TextContentXML.objects.get(id=recordid)
        content = text_content_xml.content
        

        suffix = ''
        if unitid:
            suffix = '-unit'
            units = get_all_units(content, 'entry')
            for unit in units:
                if unit['unitid'] == unitid:
                    ret = ur'<root>%s</root>' % unit['content']
        else:
            ret = content

        import regex

        if ret is None:
            ret = u''

        # ret = regex.sub(ur'(?musi)<span data-dpt="abbr">.*?</span>(<span data-dpt="exp">)', ur'\1', ret)

        # ret = regex.sub(ur'(?musi)<span data-dpt="hi" data-dpt-rend="su[pb]">(.*?)</span>', ur'\1', ret)
        # ret = regex.sub(ur'(?musi)<i>(.*?)</i>', ur'\1', ret)

        # print repr(ret)

#         for it in regex.findall('<span data-dpt="hi" data-dpt-rend="su[pb]">.*?</span>', ret):
#             print repr(it)

        # for it in regex.findall(ur'(?musi)qu[i1][i1]', ret):
        #    print repr(it)
        if 0:
            ret = regex.sub(ur'(?musi)<span data-dpt="hi" data-dpt-rend="sup">([^<]+)</span>', ur'<sup>\1</sup>', ret)
            ret = regex.sub(ur'(?musi)<span data-dpt="hi" data-dpt-rend="sub">([^<]+)</span>', ur'<sub>\1</sub>', ret)
            ret = regex.sub(ur'(?musi)<span data-dpt="lb" data-dpt-src="ms"></span>', ur'<br/>', ret)
            ret = regex.sub(ur'(?musi)<span data-dpt="lb" data-dpt-src="prj"></span>', ur'<lb/>', ret)
            ret = regex.sub(ur'(?musi)<span data-dpt="abbr">(.*?)</span>', ur'<abbr>\1</abbr>', ret)
            ret = regex.sub(ur'(?musi)<span data-dpt="exp">(.*?)</span>', ur'<exp>\1</exp>', ret)

        # print repr(ret)
        file_name = 'tcx%s%s.xml' % (text_content_xml.id, suffix)
        from digipal.utils import write_file
        write_file(file_name, ret)
        print 'Written file %s ' % file_name
示例#6
0
    def command_search(self):
        if len(self.args) < 3:
            raise CommandError('Convert requires 2 arguments')

        from digipal.management.commands.utils import get_stats_from_xml_string
        from digipal_text.views.viewer import get_fragment_extent, get_all_units

        pattern = unicode(self.args[3])
        #pattern = ur'.{1,30}ħ.{1,30}'

        stats = {}
        cnt = 0
        import regex as re
        for tcx in TextContentXML.objects.filter(text_content__item_part_id=self.args[1], text_content__type__slug=self.args[2]):
            units = get_all_units(tcx.content, 'entry')
            for unit in units:
                for match in re.findall(pattern, unit['content']):
                    #print unit['unitid'], repr(match)
                    print repr(match)
                    cnt += 1
        
        print '%s occurences' % cnt