Python scrap_body示例，processing.load_body.scrap_body Python示例

示例#1

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_scrapt_body(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data, 'en')

        self.assertEqual(result, '<div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p>')

示例#2

0

显示文件

    def test_scrapt_body_not_found_for_a_given_language(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data.encode('utf-8'), 'pt')

        self.assertEqual(result, None)

示例#3

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_scrapt_body_not_found(self):

        data = u"""<html><header></header><body><div class="content"></div></body></html>"""

        result = load_body.scrap_body(data, 'pt')

        self.assertEqual(result, None)

示例#4

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_scrapt_body_not_found_for_a_given_language(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data, 'pt')

        self.assertEqual(result, None)

示例#5

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_scrapt_body(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data, 'en')

        self.assertEqual(result, '<div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p>')

示例#6

0

显示文件

    def test_scrapt_body_not_found(self):

        data = u"""<html><header></header><body><div class="content"></div></body></html>"""

        result = load_body.scrap_body(data.encode('utf-8'), 'pt')

        self.assertEqual(result, None)

示例#7

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_body_sample_7(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_7.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'en')

        # Text on the begining of the document
        self.assertTrue(u'caso da bacia    do Amazonas' in result)
        # Text on the end of the document
        self.assertTrue(u'com o Embasamento. Universidade Federal' in result)

示例#8

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_body_sample_6(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_6.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Editorial' in result)
        # Text on the end of the document
        self.assertTrue(u'Boa leitura!' in result)

示例#9

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_body_sample_5(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_5.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Molestia de Carlos Chagas' in result)
        # Text on the end of the document
        self.assertTrue(u'Full text available only in PDF format' in result)

示例#10

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_body_sample_4(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_4.html', 'r', encoding='utf-8').readlines()])
          
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Aquarelas de um Brasil' in result)
        # Text on the end of the document
        self.assertTrue(u'São Paulo, Companhia das Letras.' in result)

示例#11

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_body_sample_3(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_3.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'A TRIBUTAÇÃO NA PRODUÇÃO DE CARVÃO VEGETAL' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido: 03 de Fevereiro de 2012; Aceito: 14 de Abril de 2014' in result)

示例#12

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_body_sample_2(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_2.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'meio para isolamento de' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido    para publicação em 31-7-1967' in result)

示例#13

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_body_sample_4(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_4.html', 'r', encoding='utf-8').readlines()])
          
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Aquarelas de um Brasil' in result)
        # Text on the end of the document
        self.assertTrue(u'São Paulo, Companhia das Letras.' in result)

示例#14

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_body_sample_2(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_2.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'meio para isolamento de' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido    para publicação em 31-7-1967' in result)

示例#15

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_body_sample_7(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_7.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'en')

        # Text on the begining of the document
        self.assertTrue(u'caso da bacia    do Amazonas' in result)
        # Text on the end of the document
        self.assertTrue(u'com o Embasamento. Universidade Federal' in result)

示例#16

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_body_sample_6(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_6.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Editorial' in result)
        # Text on the end of the document
        self.assertTrue(u'Boa leitura!' in result)

示例#17

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_body_sample_5(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_5.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Molestia de Carlos Chagas' in result)
        # Text on the end of the document
        self.assertTrue(u'Full text available only in PDF format' in result)

示例#18

0

显示文件

文件： test_load_body.py 项目： brunousml/articles_meta

    def test_body_sample_1(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_1.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'On the one pot syntheses' in result)
        # Text on the end of the document
        self.assertTrue(u'Web Release Date: November 26, 2009' in result)

示例#19

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_body_sample_1(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_1.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'On the one pot syntheses' in result)
        # Text on the end of the document
        self.assertTrue(u'Web Release Date: November 26, 2009' in result)

示例#20

0

显示文件

文件： test_load_body.py 项目： javani/articles_meta

    def test_body_sample_3(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_3.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'A TRIBUTAÇÃO NA PRODUÇÃO DE CARVÃO VEGETAL' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido: 03 de Fevereiro de 2012; Aceito: 14 de Abril de 2014' in result)