def test_split_content(self):
     with open('{}{}'.format(base_resources, '2011-1-19raw.txt'), 'r')\
     as f:
         text = f.read().decode('utf-8')
         text = ''.join(text)
         split_doc = split_minutes_content(text)
         self.assertEqual(len(split_doc), 2)
Пример #2
0
    def run(self):
        with self.input().open("r") as I:
            text = I.read()
            text = text.decode('utf8')
            text = ''.join(text)

            split_doc = split_minutes_content(text)
            with self.output().open("w") as O:
                O.write(split_doc[0].encode("utf8"))
Пример #3
0
    def run(self):
        with self.input().open("r") as I:
            text = I.read()
            text = text.decode('utf8')
            text = ''.join(text)


            split_doc = split_minutes_content(text)
            trans_table = dict.fromkeys(map(ord, u"\n"), None)
            sp_text = split_doc[1].translate(trans_table)

            with self.output().open("w") as O:
                O.write(sp_text.encode('utf8'))
    def test_split_discussion(self):
        with open('{}{}'.format(base_resources, '2011-1-19raw.txt'), mode='r')\
        as f:
            text = f.read().decode('utf8')
            text = ''.join(text)

            split_doc = split_minutes_content(text)
            self.assertEqual(len(split_doc), 2)

            #print(split_doc[1][:3000])
            #convos = split_statements_from_discussion(split_doc[1])
            trans_table = dict.fromkeys(map(ord, u"\n"), None)
            #print(trans_table)
            sp_text = split_doc[1].translate(trans_table)
            #print(sp_text)
            convos = split_statements_via_colon(sp_text)

            #print("Statements found: {}".format(convos[:5]))
            for i in range(0, 50):
                print(unicode(convos[i]))
            self.assertGreater(len(convos), 100)