Пример #1
0
def parse_json():
    parser = EmailReplyParser(language='en')
    with open('english.json', 'rb') as fl:
        messages = json.load(fl)
    parsed = []
    for text in messages:
        soup = BeautifulSoup(text, 'lxml')
        text = soup.getText('\n')
        text = parser.parse_reply(text)
        parsed.append(text)
    import code
    code.interact(local=locals())
Пример #2
0
def verify():
    parser = EmailReplyParser(language='fi')
    texts = json.load(open('test/emails/emails.json'))
    texts = list(filter(lambda d: type(d) == str, texts))
    parsed = []
    for text in texts:
        print('-'*100)
        soup = BeautifulSoup(text, 'lxml')
        text = soup.getText('\n')
        text = parser.parse_reply(text)
        parsed.append(text)
        print(text)
Пример #3
0
def parse_df():
    parser = EmailReplyParser(language='en')
    path = 'test/emails/zipwrotetest.csv'
    df = pd.read_csv(path)
    parsed = []
    for text in df.sentence.values:
        soup = BeautifulSoup(text, 'lxml')
        text = soup.getText('\n')
        text = parser.parse_reply(text)
        parsed.append(text)
    df = df.assign(clean=parsed)
    df.to_csv(path)
    import code
    code.interact(local=locals())
Пример #4
0
def profile():
    df = pd.read_csv('test.csv')
    ground = time.time()
    content = df.content.values[np.argmax([len(d) for d in df.content.values])]
    start = time.time()
    parser = EmailReplyParser(language='fr')
    print(str(time.time() - start) + 'init parser')
    start = time.time()
    res = parser.parse_reply(content)
    print(str(time.time() - start) + 'parse')
    start = time.time()
    soup = BeautifulSoup(res, 'lxml')
    text = soup.getText(' ')
    print(str(time.time() - start) + 'soup')
    print(f'Total time: {time.time() - ground}')
Пример #5
0
def parse_text():
    parser = EmailReplyParser(language='en')
    with open('test/emails/caution.txt', 'r') as fl:
        message = fl.read()
    text = parser.parse_reply(message)
    print(text)
 def test_email_one_is_not_on(self):
     with open('test/emails/email_one_is_not_on.txt') as email:
         self.assertTrue(
             "On Oct 1, 2012, at 11:55 PM, Dave Tapley wrote:" not in EmailReplyParser().parse_reply(email.read()))
 def get_email(self, name):
     """ Return EmailMessage instance
     """
     with open('test/emails/%s.txt' % name) as f:
         text = f.read()
     return EmailReplyParser().read(text)
    def test_sent_from_iphone(self):
        with open('test/emails/email_iPhone.txt') as email:

            self.assertTrue("Sent from my iPhone" not in EmailReplyParser().parse_reply(email.read()))
 def test_parse_out_just_top_for_outlook_with_unusual_headers_format(self):
     with open('test/emails/email_2_3.txt') as f:
         self.assertEqual(
             "Outlook with a reply above headers using unusual format",
             EmailReplyParser().parse_reply(f.read()))
 def test_parse_out_just_top_for_outlook_with_reply_directly_above_line(self):
     with open('test/emails/email_2_2.txt') as f:
         self.assertEqual("Outlook with a reply directly above line", EmailReplyParser().parse_reply(f.read()))
 def test_parse_out_just_top_for_outlook_reply(self):
     with open('test/emails/email_2_1.txt') as f:
         self.assertEqual("Outlook with a reply", EmailReplyParser().parse_reply(f.read()))
 def test_reply_from_gmail(self):
     with open('test/emails/email_gmail.txt') as f:
         self.assertEqual('This is a test for inbox replying to a github message.',
                          EmailReplyParser().parse_reply(f.read()))