示例#1
0
def build_webassets(output_dir):
    with open('1_KING_HENRY_IV_rev.txt', 'r') as input_file:
        text_contents = input_file.read()
        tokenizer = RegexTokenizer()
        tokens = tokenizer.tokenize(text_contents)
        tagger = DocuscopeTagger(return_included_tags=True)
        tags = tagger.tag(tokens)
        formatter = HTMLFormatter()
        formatter._build_webassets()
        html = formatter.format_paginated(tags=tags, tokens=tokens, text_name="1_KING_HENRY_IV_rev.txt",
                                               text_relative_path="", processing_id="")

    with open(os.path.join(output_dir, 'Ubiqu+Ity_1_KING_HENRY_IV_Docuscope_Example_Output.html'), 'w') as output_file:
        output_file.write(html)
示例#2
0
 def setUp(self):
     self.tags = {
         'key1': {
             'name': '1',
             'full_name': 'key1'
         },
         'key2': {
             'name': '2',
             'full_name': 'key2'
         },
         'key3': {
             'name': '3',
             'full_name': 'key3'
         }
     }
     self.formatter = HTMLFormatter()
     self.prepared_tags = self.formatter.prepare_tags(self.tags)
示例#3
0
 def setUp(self):
     self.formatter = HTMLFormatter()
     self.text = "So shaken as we are, so wan with care"
     self.tokens = [[['So'], 0, 2, 0], [[' '], 2, 1, 2],
                    [['shaken'], 3, 6, 0], [[' '], 9, 1, 2],
                    [['as'], 10, 2, 0], [[' '], 12, 1, 2],
                    [['we'], 13, 2, 0], [[' '], 15, 1, 2],
                    [['are'], 16, 3, 0], [[','], 19, 1, 1], [[' '], 20, 1,
                                                             2],
                    [['so'], 21, 2, 0], [[' '], 23, 1, 2],
                    [['wan'], 24, 3, 0], [[' '], 27, 1, 2],
                    [['with'], 28, 4, 0], [[' '], 32, 1, 2],
                    [['care'], 33, 4, 0]]
     self.tags = [
         None,
         [{
             'index_end':
             2,
             'rules':
             [('DocuscopeTagger..default.(2, 3).True.EXCL_!UNTAGGED.EXCL_!NORULES.EXCL_!EXCLUDED.Transformation',
               'shaken')],
             'token_end_len':
             6,
             'len':
             1,
             'num_included_tokens':
             1,
             'index_start':
             2,
             'pos_end':
             3,
             'pos_start':
             3
         }, {
             'index_end':
             8,
             'rules':
             [('DocuscopeTagger..default.(2, 3).True.EXCL_!UNTAGGED.EXCL_!NORULES.EXCL_!EXCLUDED.ReportingStates',
               ('we', 'are'))],
             'token_end_len':
             3,
             'len':
             1,
             'num_included_tokens':
             2,
             'index_start':
             6,
             'pos_end':
             16,
             'pos_start':
             13
         }, {
             'index_end':
             11,
             'rules':
             [('DocuscopeTagger..default.(2, 3).True.EXCL_!UNTAGGED.EXCL_!NORULES.EXCL_!EXCLUDED.ReasonForward',
               (',', 'so'))],
             'token_end_len':
             2,
             'len':
             1,
             'num_included_tokens':
             2,
             'index_start':
             9,
             'pos_end':
             21,
             'pos_start':
             19
         }, {
             'index_end':
             13,
             'rules':
             [('DocuscopeTagger..default.(2, 3).True.EXCL_!UNTAGGED.EXCL_!NORULES.EXCL_!EXCLUDED.Negativity',
               'wan')],
             'token_end_len':
             3,
             'len':
             1,
             'num_included_tokens':
             1,
             'index_start':
             13,
             'pos_end':
             24,
             'pos_start':
             24
         }, {
             'index_end':
             17,
             'rules':
             [('DocuscopeTagger..default.(2, 3).True.EXCL_!UNTAGGED.EXCL_!NORULES.EXCL_!EXCLUDED.StandardsPos',
               ('with', 'care'))],
             'token_end_len':
             4,
             'len':
             1,
             'num_included_tokens':
             2,
             'index_start':
             15,
             'pos_end':
             33,
             'pos_start':
             28
         }]
     ]
     self.prepared_tokens = self.formatter.prepare_tokens(
         tokens=self.tokens, tags=self.tags)
     self.tag_map = {
         1: 'Transformation',
         3: 'ReportingStates',
         4: 'ReasonForward',
         6: 'Negativity',
         8: 'StandardsPos'
     }
     self.pos_map = {
         0: 0,
         1: 3,
         2: 9,
         3: 13,
         4: 19,
         5: 23,
         6: 24,
         7: 27,
         8: 28
     }
示例#4
0
 def setUp(self):
     self.pages = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     self.page_size = 3
     self.formatter = HTMLFormatter()
     self.paginated = self.formatter.paginate(self.pages, self.page_size)