示例#1
0
def test_person_summary():
    s = Summarizer()

    people = [
        {
            "gender": "F",
            "image": "https://example.com/image1",
            "party": [{"name": "Democratic"}, {"name": "Democratic", "end_date": "1990"}],
        },
        {
            "gender": "F",
            "image": "https://example.com/image2",
            "party": [{"name": "Democratic"}, {"name": "Working Families"}],
            "extras": {"religion": "Zoroastrian"},
            "contact_details": [{"fax": "123-456-7890", "note": "Capitol Office"}],
            "other_identifiers": [{"scheme": "fake", "identifier": "abc"}],
            "ids": {"twitter": "fake"},
        },
        {
            "gender": "M",
            "image": "https://example.com/image3",
            "party": [{"name": "Republican"}],
            "contact_details": [{"phone": "123-456-7890", "note": "Capitol Office"}],
            "other_identifiers": [{"scheme": "fake", "identifier": "123"}],
        },
    ]

    for p in people:
        s.summarize(p)

    assert s.parties == {"Republican": 1, "Democratic": 2, "Working Families": 1}
    assert s.contact_counts == {"Capitol Office phone": 1, "Capitol Office fax": 1}
    assert s.id_counts == {"fake": 2, "twitter": 1}
    assert s.optional_fields == {"gender": 3, "image": 3}
    assert s.extra_counts == {"religion": 1}
示例#2
0
  def test_summarize_multipe(self):
    args = '{"output_ids": [], \
      "ppg_ids": ["DEF", "LTFL", "LP61"], \
      "problem_objective_ids": ["FGH"], \
      "goal_ids": ["EFG"], \
      "operation_ids": ["BEN", "LISA"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

    s = Summarizer(env = 'test', args = args)

    summary = s.summarize()
    assert summary.strip() == 'the quick brown fox. the second quick brown fox.'
示例#3
0
  def test_query(self):
    args = '{"output_ids": [], \
      "ppg_ids": ["DEF", "LTFL", "LP61"], \
      "problem_objective_ids": ["FGH"], \
      "goal_ids": ["EFG"], \
      "operation_ids": ["BEN", "7VC"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

    s = Summarizer(env = 'test', args = args)

    text = s.query()

    assert text.strip() == 'the quick brown fox.'
示例#4
0
    def test_query(self):
        args = '{"output_ids": [], \
      "ppg_ids": ["DEF", "LTFL", "LP61"], \
      "problem_objective_ids": ["FGH"], \
      "goal_ids": ["EFG"], \
      "operation_ids": ["BEN", "7VC"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

        s = Summarizer(env='test', args=args)

        text = s.query()

        assert text.strip() == 'the quick brown fox.'
示例#5
0
    def test_summarize_multipe(self):
        args = '{"output_ids": [], \
      "ppg_ids": ["DEF", "LTFL", "LP61"], \
      "problem_objective_ids": ["FGH"], \
      "goal_ids": ["EFG"], \
      "operation_ids": ["BEN", "LISA"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

        s = Summarizer(env='test', args=args)

        summary = s.summarize()
        assert summary.strip(
        ) == 'the quick brown fox. the second quick brown fox.'
示例#6
0
    def test_summarize_large(self):
        args = '{"output_ids": [], \
      "ppg_ids": ["DEF", "LTFL", "LP61"], \
      "problem_objective_ids": ["FGH"], \
      "goal_ids": ["EFG"], \
      "operation_ids": ["BEN", "LISA", "JEFF"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

        max_chars = 500

        s = Summarizer(env='test', args=args, max_chars=max_chars)

        summary = s.summarize()
        assert len(summary) <= max_chars
示例#7
0
  def test_summarize_large(self):
    args = '{"output_ids": [], \
      "ppg_ids": ["DEF", "LTFL", "LP61"], \
      "problem_objective_ids": ["FGH"], \
      "goal_ids": ["EFG"], \
      "operation_ids": ["BEN", "LISA", "JEFF"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

    max_chars = 500

    s = Summarizer(env = 'test', args = args, max_chars = max_chars)

    summary = s.summarize()
    assert len(summary) <= max_chars
    def test_get_optimal_subset(self):  # takes about .9 seconds
        with open('sample_text.txt', encoding="utf8") as text_file:

            # Load text
            sample_text = str(text_file.read())

            # Remove unwanted text in the essay. ie '(fr)'
            sample_text = sample_text.replace('(fr) ', '')

            # Load document class
            summarizer = Summarizer(text=sample_text)

            # get summary
            val, summary = summarizer.get_optimal_subset(2500, ret_as="str")

            # Write out summary
            with open('output.txt', 'w+') as w_file:
                w_file.write(summary)
示例#9
0
  def test_query_string(self):

    args = '{"output_ids": [], \
      "ppg_ids": ["LRZQ", "LTFL", "LP61"], \
      "problem_objective_ids": ["c70f5d80-a7cd-4d68-a085-aa04702c0fea"], \
      "goal_ids": ["EM"], \
      "operation_ids": ["BEN", "7VC"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

    s = Summarizer(env = 'test', args = args)

    assert len(s.args['operation_ids']) == 2
    assert len(s.args['output_ids']) == 0
    assert len(s.args['ppg_ids']) == 3
    assert int(s.args['year']) == 2013
    assert s.args['report_type'] == 'Mid Year Report'

    query_string = s.query_string()

    assert s.args['report_type'] in query_string
    assert str(s.args['year']) in query_string
    assert s.args['goal_ids'][0] in query_string
    assert s.args['operation_ids'][1] in query_string
示例#10
0
    def test_query_string(self):

        args = '{"output_ids": [], \
      "ppg_ids": ["LRZQ", "LTFL", "LP61"], \
      "problem_objective_ids": ["c70f5d80-a7cd-4d68-a085-aa04702c0fea"], \
      "goal_ids": ["EM"], \
      "operation_ids": ["BEN", "7VC"], \
      "report_type": "Mid Year Report",  \
      "year": 2013 }'

        s = Summarizer(env='test', args=args)

        assert len(s.args['operation_ids']) == 2
        assert len(s.args['output_ids']) == 0
        assert len(s.args['ppg_ids']) == 3
        assert int(s.args['year']) == 2013
        assert s.args['report_type'] == 'Mid Year Report'

        query_string = s.query_string()

        assert s.args['report_type'] in query_string
        assert str(s.args['year']) in query_string
        assert s.args['goal_ids'][0] in query_string
        assert s.args['operation_ids'][1] in query_string
示例#11
0
        tk.withdraw()
        copied_text = tk.clipboard_get()

        subj, text = body_from_website(copied_text.lower())
        if text == '':
            if len(copied_text) > 100:
                text = copied_text
                subj = ''
            else:
                print(f'Invalid Input: {copied_text}')
                input('Press enter to continue...')
                sys.exit()
        else:
            print(f'Loading text from: {copied_text}')

    document = Summarizer(text)

    _, summary = document.get_optimal_subset_by_percent_words(.15,
                                                              ret_as='str')

    # Generate response

    response_body = 'Title: ' + subj + '\n\n'
    if len(summary) != 0:
        response_body += summary
    else:
        response_body += 'Text Too Short'
    response_body += '\n\n--------------\n\n'

    print(response_body)
示例#12
0
import pickle

from pathlib import Path
from summa import summarizer
from summarize import Summarizer


if __name__ == '__main__':

	DATA_PATH = Path('data')
	reviewtext = pd.read_pickle(DATA_PATH/'df_processed_reviews.p')
	idx2sent = pickle.load(open(DATA_PATH/'idx2sent.p', 'rb'))
	sent2vec = pickle.load(open(DATA_PATH/'sent2vec.p', 'rb'))

	idx = 198
	my_summarizer = Summarizer(reviewtext, idx2sent, sent2vec)
	review, my_summary = my_summarizer.summarize(idx, n=100)
	summary = summarizer.summarize(review, ratio=0.2)
示例#13
0
            continue

        print(f'Emails to process: {emails}')
        sender, subject, body = email_client.get_email_data(
            emails.split(' ')[0])
        print(f'Email received from: {sender}')

        try:
            documents = []
            # Load document class with body text
            if subject.lower() == 'summarize: url':
                for url in re.split(r'[ \t\n\r]+', str(body)):
                    if url != '':
                        subj, text = body_from_website(url.lower())
                        if text != '':
                            documents.append(Summarizer(text))
            else:
                documents.append(Summarizer(str(body)))

            # Find summary, that is twenty percent the size
            summaries = []
            for document in documents:
                summaries.append(
                    document.get_optimal_subset_by_percent_words(
                        .15, ret_as='str')[1])

            # Create ew email body
            response_body = ''
            for i in range(len(documents)):
                response_body += 'Title: ' + subject
                response_body += '\n\n'
示例#14
0
from django.shortcuts import render, redirect
from django.http import HttpResponse

from app.util import ExamplePicker
from app.models import Eval

from summarize import Summarizer

import time
import json
import os
from pathlib import Path

project_root = Path(os.getcwd())

summarizer = Summarizer()
article_picker = ExamplePicker(data_path=project_root.joinpath('data').as_posix())


def demo(request):
    return_object = {
        'article': None,
        'summary': None,
        'options': ["pgn", "textrank"]
    }

    if request.method == 'GET':
        article = ""  # article selection module

        return_object['article'] = article_picker.pick_random_article()
示例#15
0
from summarize import Summarizer

argentina_articles = [
    "argentina/argentina-guardian.txt", "argentina/argentina-nyt.txt"
]
china_articles = ["china/china-cnn.txt", "china/china-nyt.txt"]
climate_articles = ["climate/climate-npr.txt", "climate/climate-nyt.txt"]
VW_articles = ["VW/VW-ars.txt", "VW/VW-nyt.txt"]
Iran_articles = ["iran/iran.txt"]

magic = Summarizer(Iran_articles)
print(magic.generate_summaries())
示例#16
0
    def test_setup(self):
        s = Summarizer(env='test')

        assert s.env == 'test'
        assert s.language == 'english'
示例#17
0
# driver.py
# Luke Reichold - CSCI 4930
from summarize import Summarizer


argentina_articles = ["argentina/argentina-guardian.txt", "argentina/argentina-nyt.txt"]
china_articles = ["china/china-cnn.txt", "china/china-nyt.txt"]
climate_articles = ["climate/climate-npr.txt", "climate/climate-nyt.txt"]
VW_articles = ["VW/VW-ars.txt", "VW/VW-nyt.txt"]


magic = Summarizer(VW_articles)
print(magic.generate_summaries())