예제 #1
0
    def __init__(self, summary_problem, units='n2'):

        self.unit_name = units
        self.problem = summary_problem

        if units == 'n1':
            self.unit_selector = lambda x: util.get_ngrams(x, n=1)
        elif units == 'n2':
            self.unit_selector = lambda x: util.get_ngrams(x, n=2)
        elif units == 'n3':
            self.unit_selector = lambda x: util.get_ngrams(x, n=3)
        elif units == 'n4':
            self.unit_selector = lambda x: util.get_ngrams(x, n=4)
        elif units == 'su4':
            self.unit_selector = lambda x: util.get_skip_bigrams(
                x, k=4) + util.get_ngrams(x, n=1)
        else:
            units = util.get_ngrams  # default options

        ## variables to set later
        self.concepts = None
        self.concept_weights = None
        self.concept_index = None
        self.relevant_sents = None
        self.relevant_sent_concepts = None

        ## defaults
        self.min_sent_length = 5
        self.max_sents = 10000
예제 #2
0
    def __init__(self, summary_problem, units='n2'):
        
        self.unit_name = units
        self.problem = summary_problem

        if   units == 'n1': self.unit_selector = lambda x: util.get_ngrams(x, n=1)
        elif units == 'n2': self.unit_selector = lambda x: util.get_ngrams(x, n=2)
        elif units == 'n3': self.unit_selector = lambda x: util.get_ngrams(x, n=3)
        elif units == 'n4': self.unit_selector = lambda x: util.get_ngrams(x, n=4)
        elif units == 'su4' : self.unit_selector = lambda x: util.get_skip_bigrams(x, k=4) + util.get_ngrams(x, n=1)
        else: units = util.get_ngrams  # default options

        ## variables to set later
        self.concepts = None
        self.concept_weights = None
        self.concept_index = None
        self.relevant_sents = None
        self.relevant_sent_concepts = None

        ## defaults
        self.min_sent_length = 5
        self.max_sents = 10000
예제 #3
0
    def __init__(self, summary_problem, units='n2', df=None):
        
        self.unit_name = units
        self.problem = summary_problem
        self.df = df

        use_bounds = False
        if   units == 'n1': self.unit_selector = lambda x: util.get_ngrams(x, n=1, bounds=use_bounds)
        elif units == 'n2': self.unit_selector = lambda x: util.get_ngrams(x, n=2, bounds=use_bounds)
        elif units == 'n3': self.unit_selector = lambda x: util.get_ngrams(x, n=3, bounds=use_bounds)
        elif units == 'n12': self.unit_selector = lambda x: util.get_ngrams(x, n=1) + util.get_ngrams(x, n=2)
        elif units == 'n23': self.unit_selector = lambda x: util.get_ngrams(x, n=2) + util.get_ngrams(x, n=3)
        elif units == 's2' : self.unit_selector = lambda x: util.get_skip_bigrams(x, k=4) + util.get_ngrams(x, n=1)
        else: units = util.get_ngrams  # default options

        ## variables to set later
        self.concept_sets = None
        self.concept_weight_sets = None
        self.concept_index_sets = None
        self.relevant_sent_sets = None
        self.relevant_sent_concepts = None

        ## defaults
        self.min_sent_length = 5