def testExtractTextInfo(self): textTags = hrefliterals.extractTextInfo( self.document, self.exclude_re, '') self.assertEqual(textTags.keys(), ['f']) self.assertEqual(len(textTags['f']), 11) self.assertEqual(textTags['f'][0], hrefliterals.TextTag( hrefliterals.TextTag.Type.WORD, 29, 34, 'words')) self.assertEqual(textTags['f'][1], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 34, 35, ' ')) self.assertEqual(textTags['f'][2], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 35, 36, '1')) self.assertEqual(textTags['f'][3], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 36, 37, ' ')) self.assertEqual(textTags['f'][4], hrefliterals.TextTag( hrefliterals.TextTag.Type.WORD, 37, 41, 'word')) self.assertEqual(textTags['f'][5], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 41, 42, '2')) self.assertEqual(textTags['f'][6], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 42, 43, ' ')) self.assertEqual(textTags['f'][7], hrefliterals.TextTag( hrefliterals.TextTag.Type.WORD, 43, 72, 'word')) self.assertEqual(textTags['f'][8], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 72, 73, ' ')) self.assertEqual(textTags['f'][9], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 123, 124, ' ')) self.assertEqual(textTags['f'][10], hrefliterals.TextTag( hrefliterals.TextTag.Type.CHARACTER, 150, 151, ' '))
def testFindLiterals(self): textTags = hrefliterals.extractTextInfo( self.document, self.exclude_re, '') literals = {'word':None, 'W.O.R.D.2.':None} literalTags = hrefliterals.findLiterals( textTags['f'], literals, {}, self.words, self.stemmer, 0) self.assertEqual(len(literalTags), 3) self.assertEqual(literalTags[0], hrefliterals.TextTag( hrefliterals.TextTag.Type.LITERAL, 29, 34, 'word')) self.assertEqual(literalTags[1], hrefliterals.TextTag( hrefliterals.TextTag.Type.LITERAL, 37, 42, 'W.O.R.D.2.')) self.assertEqual(literalTags[2], hrefliterals.TextTag( hrefliterals.TextTag.Type.LITERAL, 43, 72, 'word'))
def testExtractTextInfo(self): textTags = hrefliterals.extractTextInfo(self.document, self.exclude_re, '') self.assertEqual(textTags.keys(), ['f']) self.assertEqual(len(textTags['f']), 11) self.assertEqual( textTags['f'][0], hrefliterals.TextTag(hrefliterals.TextTag.Type.WORD, 29, 34, 'words')) self.assertEqual( textTags['f'][1], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 34, 35, ' ')) self.assertEqual( textTags['f'][2], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 35, 36, '1')) self.assertEqual( textTags['f'][3], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 36, 37, ' ')) self.assertEqual( textTags['f'][4], hrefliterals.TextTag(hrefliterals.TextTag.Type.WORD, 37, 41, 'word')) self.assertEqual( textTags['f'][5], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 41, 42, '2')) self.assertEqual( textTags['f'][6], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 42, 43, ' ')) self.assertEqual( textTags['f'][7], hrefliterals.TextTag(hrefliterals.TextTag.Type.WORD, 43, 72, 'word')) self.assertEqual( textTags['f'][8], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 72, 73, ' ')) self.assertEqual( textTags['f'][9], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 123, 124, ' ')) self.assertEqual( textTags['f'][10], hrefliterals.TextTag(hrefliterals.TextTag.Type.CHARACTER, 150, 151, ' '))
def testReplaceTags(self): textTags = hrefliterals.extractTextInfo( self.document, self.exclude_re, '') literals = {'word':None, 'W.O.R.D.2.':None} literalTags = hrefliterals.findLiterals( textTags['f'], literals, {}, self.words, self.stemmer, 0) literalTags[0].value = '*' literalTags[1].value = '**' literalTags[2].value = '***' replaced = hrefliterals.replaceLiterals(self.source, literalTags) rsource = r""" \begin{document}* 1 ** *** \begin{equation}word\end{equation} \end{document} """ self.assertEqual(replaced, rsource)
def testReplaceTags(self): textTags = hrefliterals.extractTextInfo(self.document, self.exclude_re, '') literals = {'word': None, 'W.O.R.D.2.': None} literalTags = hrefliterals.findLiterals(textTags['f'], literals, {}, self.words, self.stemmer, 0) literalTags[0].value = '*' literalTags[1].value = '**' literalTags[2].value = '***' replaced = hrefliterals.replaceLiterals(self.source, literalTags) rsource = r""" \begin{document}* 1 ** *** \begin{equation}word\end{equation} \end{document} """ self.assertEqual(replaced, rsource)
def testFindLiterals(self): textTags = hrefliterals.extractTextInfo(self.document, self.exclude_re, '') literals = {'word': None, 'W.O.R.D.2.': None} literalTags = hrefliterals.findLiterals(textTags['f'], literals, {}, self.words, self.stemmer, 0) self.assertEqual(len(literalTags), 3) self.assertEqual( literalTags[0], hrefliterals.TextTag(hrefliterals.TextTag.Type.LITERAL, 29, 34, 'word')) self.assertEqual( literalTags[1], hrefliterals.TextTag(hrefliterals.TextTag.Type.LITERAL, 37, 42, 'W.O.R.D.2.')) self.assertEqual( literalTags[2], hrefliterals.TextTag(hrefliterals.TextTag.Type.LITERAL, 43, 72, 'word'))
def findLiterals(self, source, literals, notLiterals): document = hrefliterals.parseDocument('f', StringIO.StringIO(source), os.getcwd()) textTags = hrefliterals.extractTextInfo(document, self.exclude_re, '') return hrefliterals.findLiterals(textTags['f'], literals, notLiterals, self.words, self.stemmer, 0)