def generate_phrase(self, pool):
     try:
         adj = choice(list(pool.adjectives))
         parser = ChartParser(self.grammar)
         gr = parser.grammar()
         phrase = self.produce(gr, gr.start())
         #adj = choice(list(pool.adjectives))
         noun = choice(list(pool.comparisons[adj]))
         if en.noun.plural(noun.name) == noun.name:
             article = "the"
         else:
             article = en.noun.article(noun.name).split(" ")[0]
         replace_words = {'adj': adj, 'n': noun, 'det': article}
         for pos in replace_words:
             while pos in phrase:
                 try:
                     phrase = self.replace_pos(pos, replace_words[pos],
                                               phrase)
                 except:
                     return
         for w in phrase:
             if not isinstance(w, Word):
                 phrase[phrase.index(w)] = Word(w)
         return phrase
     except:
         return
 def generate_phrase(self, pool):
     parser = ChartParser(self.grammar)
     gr = parser.grammar()
     phrase = self.produce(gr, gr.start())
     noun = choice(list(pool.nouns))
     try:
         replace_words = {'n':[noun], 'v': [Word(self.conjugate(v.name)) for v in list(pool.verbs[noun])], 
                          'adj': pool.epithets[noun],
                          'atv':[Word(self.conjugate(v, self.person)) for v in self.atv],
                          'eva':[Word(self.conjugate(v, self.person)) for v in self.eva],
                          'ej': pool.emotional_adjectives,'en':pool.emotional_nouns,
                          'erb': pool.emotional_adverbs, 'person':[Word(self.persons[self.person][0])], 
                          'pron':[Word(self.persons[self.person][1])]}
     except:
         return
     for pos in replace_words:
         while pos in phrase:
             try:
                 word = choice(replace_words[pos])
                 phrase = self.replace_pos(pos,word,phrase)
             except:
                 return
     for w in phrase:
         if not isinstance(w, Word):
             phrase[phrase.index(w)] = Word(w)
     return phrase
 def generate_phrase(self, pool):
     parser = ChartParser(self.grammar)
     gr = parser.grammar()
     phrase = self.produce(gr, gr.start())
     noun = choice(list(pool.nouns))
     try:
         replace_words = {
             'n': [noun],
             'v':
             [Word(self.conjugate(v.name)) for v in list(pool.verbs[noun])],
             'adj': pool.epithets[noun],
             'atv':
             [Word(self.conjugate(v, self.person)) for v in self.atv],
             'eva':
             [Word(self.conjugate(v, self.person)) for v in self.eva],
             'ej': pool.emotional_adjectives,
             'en': pool.emotional_nouns,
             'erb': pool.emotional_adverbs,
             'person': [Word(self.persons[self.person][0])],
             'pron': [Word(self.persons[self.person][1])]
         }
     except:
         return
     for pos in replace_words:
         while pos in phrase:
             try:
                 word = choice(replace_words[pos])
                 phrase = self.replace_pos(pos, word, phrase)
             except:
                 return
     for w in phrase:
         if not isinstance(w, Word):
             phrase[phrase.index(w)] = Word(w)
     return phrase
Пример #4
0
def generate_name(G):
    grammar = CFG.fromstring(G)

    parser = ChartParser(grammar)

    gr = parser.grammar()
    tokens = produce(gr, gr.start())
    name = ''.join(tokens)
    return name.title()
Пример #5
0
def generate_entities_question(attr, entities, phase):
    entity = get_attribute_name(attr, entities)
    parser = ChartParser(generate_entities_grammar(entity, phase))
    gr = parser.grammar()
    question = {
        'text': ' '.join(produce(gr, gr.start())),
        'answer': 0,
        'questionId': 0,
        'attrId': attr,
        'topicId': 3
    }
    return question
Пример #6
0
def generate_impacts_question(attr, impacts, phase):
    impact = get_attribute_name(attr, impacts)
    parser = ChartParser(generate_impacts_grammar(impact, phase))
    gr = parser.grammar()
    question = {
        'text': ' '.join(produce(gr, gr.start())),
        'answer': 0,
        'questionId': 0,
        'attrId': attr,
        'topicId': 4
    }
    return question
Пример #7
0
def generate_sources_question(attr, parent_attr, sources, phase):
    id = attr
    attribute = get_attribute_name(attr, sources)
    attribute = analyze_numerus(attribute)
    if parent_attr is not None:
        parent_attr = get_attribute_name(parent_attr, sources)
    parser = ChartParser(
        generate_sources_grammar(attribute, parent_attr, phase))
    gr = parser.grammar()
    question = {
        'text': ' '.join(produce(gr, gr.start())),
        'answer': 0,
        'questionId': 0,
        'attrId': id,
        'topicId': 1
    }
    return question
Пример #8
0
    def generate_phrase(self):
        adj = choice([a for a in self.blackboard.pool.comparisons if len(self.blackboard.pool.comparisons[a]) > 0])
        parser = ChartParser(self.grammar)
        gr = parser.grammar()
        phrase = self.produce(gr, gr.start())
        noun = choice(list(self.blackboard.pool.comparisons[adj]))
        noun.name = en.singularize(noun.name)
        article = en.referenced(noun.name).split(" ")[0]
        replace_words = {'adj': adj, 'n': noun, 'det': article}

        for pos in replace_words:
            while pos in phrase:
                try:
                    phrase = self.replace_pos(
                        pos, replace_words[pos], phrase)
                except:
                    return
        for w in phrase:
            if not isinstance(w, Word):
                phrase[phrase.index(w)] = Word(w)
        return phrase
 def generate_phrase(self, pool):
     noun = random.choice(list(pool.nouns))
     parser = ChartParser(self.grammar)
     gr = parser.grammar()
     phrase = self.produce(gr, gr.start())
     phrase.append("?")
    
     try:
         adj = choice(pool.epithets[noun])
     except:
         return
     replace_words = {'adj':adj, 'n': noun, 'be': self.conjugate("be")}
     for pos in replace_words:
         while pos in phrase:
             try:
                 phrase = self.replace_pos(pos,replace_words[pos],phrase)
             except:
                 return
     for w in phrase:
         if not isinstance(w, Word):
             phrase[phrase.index(w)] = Word(w)
     return phrase
 def generate_phrase(self, pool):
     parser = ChartParser(self.grammar)
     gr = parser.grammar()
     phrase = self.produce(gr, gr.start())
     noun = random.choice(list(pool.nouns))
     adj = choice(pool.epithets[noun])
     replace_words = {
         "adj": adj,
         "n": noun,
         "be": self.conjugate("be", self.person),
         "person": self.persons[self.person][0],
     }
     for pos in replace_words:
         while pos in phrase:
             try:
                 phrase = self.replace_pos(pos, replace_words[pos], phrase)
             except:
                 return
     for w in phrase:
         if not isinstance(w, Word):
             phrase[phrase.index(w)] = Word(w)
     return phrase
Пример #11
0
    def generate_phrase(self, pool):
        noun = random.choice(list(pool.nouns))
        parser = ChartParser(self.grammar)
        gr = parser.grammar()
        phrase = self.produce(gr, gr.start())
        phrase.append("?")

        try:
            adj = choice(pool.epithets[noun])
        except:
            return
        replace_words = {'adj': adj, 'n': noun, 'be': self.conjugate("be")}
        for pos in replace_words:
            while pos in phrase:
                try:
                    phrase = self.replace_pos(pos, replace_words[pos], phrase)
                except:
                    return
        for w in phrase:
            if not isinstance(w, Word):
                phrase[phrase.index(w)] = Word(w)
        return phrase
Пример #12
0
 S -> LImports LRules
 LImports -> Import LImports | 
 Import -> '@import' '"string"' ';'
 LRules -> Rule LRules | 
 Rule -> Selectors '{' LDeclaretions '}'
 LDeclaretions -> Declaration ';' MoreDeclerations
 MoreDeclerations -> LDeclaretions | 
 Selectors -> SimpleSelector MoreSelectors
 MoreSelectors -> Selectors | 
 SimpleSelector -> Astrisk SelectorModifier
 Astrisk -> '*' | 
 SelectorModifier -> '.' 'name' | ':' 'name' | '[' 'name' '=' Term ']' | '#hashid' | 'name'
 Declaration -> 'name' ':' LTerms Important
 Important -> '!ImPoRtAnT' | 
 LTerms -> Term MoreTerms
 MoreTerms -> LTerms | 
 Term -> '1337' | '15%' | '"string"' | 'name' | '#hashid'
 """)

parser = ChartParser(grammar)
gr = parser.grammar()

test_name = "generated"

with open(test_name + '.in', 'w+') as writer:
    writer.write(' '.join(produce(gr, gr.start())))

with open(test_name + '.out', 'w+') as writer:
    writer.write("\n".join(map(str, rules)))
    writer.write("\nSuccess\n")
Пример #13
0
			else:
				words.extend(produce(grammar, sym, minlen))
	return words




grammar = parse_cfg('''
F -> N1 '(' P ')' | N2 '(' P ',' P ')'
N1 -> 'half'
N2 -> 'sum'
P -> 'a' | 'b' | F
''')

'''
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
V -> 'shot' | 'killed' | 'wounded'
Det -> 'an' | 'my' 
N -> 'elephant' | 'pajamas' | 'cat' | 'dog'
P -> 'in' | 'outside'
'''

parser = ChartParser(grammar)

gr = parser.grammar()
print ' '.join(produce(gr, gr.start(),3))

Пример #14
0
def make_sentence(corpus, term_rules, *args, **kwargs):
    '''
    
    Generate sentences with random structure and word choice
    using a context-free grammar
    
    The start point is taken from the sentence itself.
    
    Parameters
    ----------
    
    corpus : str
        a string containing the full, cleaned corpus
        
    term_rules : str
        a string containing all the terminal rules for the corpus
        
    maxdepth : int
        The maximum allowed recursion depth before throwing a
        ValueError
        
    fixed_grammar : bool
        Turn off the random sentence selection and used a fixed grammar
        instead.
    
    sample_sentence : str
        When fixed_grammar is turned on, this is the sentence that will
        be parsed. This can be finicky with grammars containing specially
        punctuated constructions like quotations or positions

    args[0] : dict()
        Optional: a dictionary of kgrams and their subsequent words. If this
        variable exists then cfgen will use this to pick the next words with
        conditional weighting (The prescence of this argument turns on Markov
        text generation features.)
        
    Notes
    -----
    
    Add the ability to turn off the kgram parsing, ideally by counting
    the number of unnamed arguments
    ----> Added this option
    
    '''

    markov_flag = (not len(args) == 0)
    if markov_flag:
        kgram_dict = args[0]

    fixed_grammar = kwargs.pop('fixed_grammar', False)
    sample_sentence = kwargs.pop('sample_sentence', '')
    maxdepth = kwargs.pop('maxdepth', 25)

    if fixed_grammar:
        if sample_sentence == '':
            warnings.warn('When using fixed_grammar, user should specify ' \
                          'the keyword argument "sample_sentence." Using a default simple sentence.')
            sample_sentence = 'The cow jumped over the moon.'
        else:
            pass

    flag = False
    attempts = 0
    while not flag and attempts < 30:
        tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

        if has_parser and not fixed_grammar:
            rsent = choice(tokenizer.tokenize(corpus))
        elif fixed_grammar:
            rsent = sample_sentence
        elif not has_parser and not fixed_grammar:
            # select from a parsed corpus of pre-approved grammars
            print("Usage library being built")
            rsent = "The dog walked up the stairs slowly."
        else:
            print("Usage library being built")
            rsent = "The dog walked up the stairs slowly."

        parsed_syntax = parse_sentence(rsent)
        # print(parsed_syntax)
        cfg_str = term_rules + parsed_syntax
        try:
            startpt = parsed_syntax[:parsed_syntax.find(' ->')]
            startpt = nltk.grammar.Nonterminal(startpt)
            grammar = CFG.fromstring(cfg_str)
            parser = ChartParser(grammar)
            gr = parser.grammar()
            if markov_flag:
                out_txt = (' '.join(
                    produce_kgram(gr,
                                  startpt,
                                  kgram_dict,
                                  maxdepth=maxdepth,
                                  sent=[])))
            else:
                out_txt = (' '.join(produce(gr, startpt, maxdepth=maxdepth)))
            flag = True
        except ValueError:
            warnings.warn(
                'Badly formed sentence encountered, resampling the corpus.')
            attempts = attempts + 1

    # now re-tag special characters
    swappairs = zip(replacements, to_replace)
    for member in swappairs:
        out_txt = out_txt.replace(member[0], member[1])

    return out_txt
Пример #15
0
    def make_sentence(self, do_markov=True, **kwargs):
        '''

        Generate sentences with random structure and word choice
        using a context-free grammar

        The start point is taken from the sentence itself.

        Parameters
        ----------
    
        do_markov : boolean that can be used to toggle the Markov
            word selection on or off
        
        maxdepth : int
            The maximum allowed recursion depth before throwing a
            ValueError

        fixed_grammar : bool
            Turn off the random sentence selection and used a fixed grammar
            instead.

        sample_sentence : str
            When fixed_grammar is turned on, this is the sentence that will
            be parsed. This can be finicky with grammars containing specially
            punctuated constructions like quotations or positions


        Notes
        -----

        Add the ability to turn off the kgram parsing, ideally by counting
        the number of unnamed arguments
        ----> Added this option

        '''

        corpus = self.corpus
        term_rules = self.term_rules

        if (self.order > 0) and do_markov:
            markov_flag = True
        else:
            markov_flag = False

        fixed_grammar = kwargs.pop('fixed_grammar', False)
        sample_sentence = kwargs.pop('sample_sentence', '')
        maxdepth = kwargs.pop('maxdepth', 25)

        if fixed_grammar:
            if sample_sentence == '':
                warnings.warn('When using fixed_grammar, user should specify ' \
                              'the keyword argument "sample_sentence." Using a default simple sentence.')
                sample_sentence = 'The cow jumped over the moon.'
            else:
                pass

        flag = False
        attempts = 0
        while not flag and attempts < 30:
            tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

            if has_parser and not fixed_grammar:
                rsent = choice(tokenizer.tokenize(corpus))
            elif fixed_grammar:
                rsent = sample_sentence
            elif not has_parser and not fixed_grammar:
                # select from a parsed corpus of pre-approved grammars
                warnings.warn(
                    "Usage library being built, falling back to simple sentence"
                )
                rsent = "The dog walked up the stairs slowly."
            else:
                warnings.warn(
                    "Usage library being built, falling back to simple sentence"
                )
                rsent = "The dog walked up the stairs slowly."

            parsed_syntax = self.parse_sentence(rsent)
            # print(parsed_syntax)
            cfg_str = term_rules + parsed_syntax
            try:
                startpt = parsed_syntax[:parsed_syntax.find(' ->')]
                startpt = nltk.grammar.Nonterminal(startpt)
                grammar = CFG.fromstring(cfg_str)
                parser = ChartParser(grammar)
                gr = parser.grammar()
                if markov_flag:
                    out_txt = (' '.join(
                        self.produce_kgram(gr,
                                           startpt,
                                           maxdepth=maxdepth,
                                           sent=[])))
                else:
                    out_txt = (' '.join(
                        self.produce(gr, startpt, maxdepth=maxdepth)))
                flag = True
            except ValueError:
                warnings.warn(
                    'Badly formed sentence encountered, resampling the corpus.'
                )
                attempts += 1

        # now re-tag special characters
        swappairs = zip(replacements, to_replace)
        for member in swappairs:
            out_txt = out_txt.replace(member[0], member[1])

        return out_txt