Пример #1
1
    def __init__(self):
        
        #comment about what each part of speach is:
        """ CC   - conjunction: or, but, and, either
            CD   - number: one, two, three
            DT   - determiner: a, an, the, both, all, these, any, some
            EX   - the word 'there'
            IN   - preposition: in, of, with, for, under, among, upon, at
            JJ   - adjective: certain, curious, little, golden, other, offended
            JJS  - adjective: -est : best, loveliest, largest
            JJR  - adjective: -er : lerger, smaller, worse
            MD   - can, dare, should, will*, might, could, must
            NN   - common singular noun
            NNS  - common plural noun
            NNP  - proper singular noun
            NNPS - proper plural noun
            PDT  - all, both, quite, many, half
            PRP  - hers, her, himself, thy, us, it, I, him, you, they
            PRPP - possesive: his, mine, our, my, her, its, your
            RB   - adverb: very, not, here, there, first, just, down, again, beautifully, -ly
            RBR  - more
            RBS  - adverb superlative: -est
            RP   - participle: up, down, out, away, over, off
            TO   - the word 'to'
            UH   - interjection
            VB   - vocative verb: to ___ 
            VBD  - past verb: -ed : was*(freq. occur), had, dipped, were, said, seemed
            VBG  - present verb: -ing: trembling, trying, getting, running, swimming
            VBN  - past verb descriptive: crowded, mutated, fallen, lit, lost, forgtten
            VBP  - present verb: not -s: am, wish, make, know, do, find
            VBZ  - present verb: -s : is*, has, seems
            WDT  - what, which, that*
            WP   - who, what
            WRB  - how, whenever, where, why, when
        """

        # create base of cfg
        g = CFG.fromstring("""
            S -> NPS VPS | NPS VPS | NPS VPS | NPP VPP | VPO | NPO
            S -> NPS VPS | NPP VPP | NPS VPS

            NPS -> 'DT' 'NN' | 'DT' 'NN' | 'DT' 'JJ' 'NN' | 'DT' 'JJ' 'NN'
            NPS -> 'EX' 'the' 'NN' | 'the' 'JJS' 'NN'
            NPS -> 'she' | 'he' | 'it' | 'I'
            NPS -> NPS INP | INP NPS

            NPP -> 'the' 'NNS' | 'the' 'NNS' | 'NNS'
            NPP -> 'the' 'JJ' 'NNS'
            NPP -> 'they' | 'you' | 'we'

            VING -> 'VBG' | 'VBG' | 'RB' 'VBG'
            VBB -> 'VB' | 'VB' | 'VBP' 

            SM -> 'is' | 'was' | 'has been'

            VPS -> SM 'VBN' | SM 'VBN' 'like the' 'JJ' 'NN'
            VPS -> SM VING | SM VING INP
            VPS -> SM VING 'like' 'DT' 'JJ' 'NN'
            VPS -> SM VING 'like a' 'NN' INP
            VPS -> SM 'as' 'JJ' 'as' 'JJ'
            VPS -> SM 'a' 'JJ' 'NN'
            VPS -> SM 'a' 'NN' INP
            VPS -> 'MD' 'have been' VING
            VPS -> 'is' 'JJ' 'and' 'JJ'
            VPS -> 'VBD' INP | 'RB' 'VBD'
            VPS -> SM 'VBD' 'like' 'DT' 'JJ' 'NN'
            VPS -> SM 'as' 'JJ' 'as the' 'NN'
            VPS -> 'VBD' 'NN' | 'VBD' 'DT' 'NN'
            VPS -> 'VBD' 'and' 'VBD' INP 'until' 'VBN'
            VPS -> VPS 'and' S
            VPS -> 'VBD' 'JJR' 'than' 'a' 'NN'
            VPS -> 'VBD' 'EX'
            VPS -> SM 'JJ' | SM 'VB' INP

            NPO -> 'a' 'NN' 'IN' 'NNP'
            NPO -> 'the' 'NN' 'IN' 'the' 'JJ' 'NNP'
            NPO -> 'the' 'NNS' 'IN' 'the' 'NN'

            VPO -> 'VBG' 'like' 'NNP' 'RP' 'DT' 'JJ' 'NN' 'IN' 'DT' 'NN'
            VPO -> 'has been' 'VBG' 'RP' 'and' 'VBG'
            
            PM -> 'are' | 'were' | 'have been'

            VPP -> PM VING | PM VING INP
            VPP -> PM VING 'like the' 'NNS' INP
            VPP -> PM 'as' 'JJ' 'as' NPS INP | PM 'JJ' 'like' 'NNS' | PM 'JJ' 'like' VBG 'NNS'
            VPP -> PM 'VBN' | PM 'VBN' INP
            VPP -> PM 'as' 'JJ' 'as' 'JJ' | PM 'as' 'JJ' 'as' 'VBG' 'NNS'
            VPP -> PM 'NNS' INP
            VPP -> PM 'JJ' 'NNS'
            VPP -> 'are' 'JJ' 'and' 'JJ'
            VPP -> 'VBD' INP | 'VBD' 'RP' INP
            VPP -> PM 'JJ' | PM 'VB' INP
            
            INP -> 'IN' 'DT' 'NN' | 'IN' 'the' 'NNS' | 'IN' 'the' 'JJ' 'NNS'
            INP -> 'IN' 'DT' 'NN' 'IN' 'DT' 'NN'
            INP -> 'IN' 'DT' 'JJ' 'NN' | 'RP' 'IN' 'DT' 'JJ' 'NN'
            INP -> 'RP' 'IN' 'DT' 'NN' | 'IN' 'JJ' 'NNS'
            INP -> 'IN' 'DT' 'NN' | 'RP' 'DT' 'NNS'
            """)

        # save grammar to self.cfg
        self.cfg = CFG.fromstring(str(g).split('\n')[1:])
        self.cfg._start = g.start()       
Пример #2
0
def verifygrammar(label, codestring, varname):
    regexp_tagger = RegexpTagger([
        (r"^[0-9]+$", "decimal"),
        (r"^0x[0-9A-Fa-f]+$", "hexadecimal"),
    ])
    # VARIABLE LINE GENERATION - Assumption - Complex numbers data types are ignored for data mining algorithms
    if label.tag == 'var':
        varGrammar = CFG.fromstring("""
            S -> VN "=" VV
            VN -> """ + varname + """
            VV -> I | D | ST | B
            B -> True | False
            I -> I N | N
            D -> I"."F
            F -> F N | N
            ST -> "'"STI"'"
            STI -> S N | S C | N | C
            N -> 0|1|2|3|4|5|6|7|8|9
            C -> a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z
            """)
    elif label.tag == 'array':
        arrayGrammar = CFG.fromstring("""
            S -> AN "= [" AE "]"
            AN -> """ + varname + """
            AE -> VV AE | VV
            VV -> I | D | ST | B
            B -> True | False
            I -> I N | N
            D -> I"."F
            F -> F N | N
            ST -> "'"STI"'"
            STI -> S N | S C | N | C
            N -> 0|1|2|3|4|5|6|7|8|9
            C -> a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z
            """)
def train():
    print("Collecting sub-corpus from Penn Treebank (nltk.corpus)")
    
    # prepare parsing trees, extrated from treebank
    tbank_trees = []
    for sent in treebank.parsed_sents():
        sent.chomsky_normal_form()
        tbank_trees.append(sent)
    
    # build vocabulary list, extracted from treebank
    vocab_size = 10000 # set vocabulary size to 10000
    words = [wrd.lower() for wrd in treebank.words()]
    vocab = [wrd for wrd,freq in Counter(treebank.words()).most_common(vocab_size)]
    
    # generate grammar rules list, extracted from treebank. and calculate their probablity based their frequency
    tbank_productions = set(production for tree in tbank_trees for production in tree.productions())
    tbank_grammar = CFG(Nonterminal('S'), list(tbank_productions))
    production_rules = tbank_grammar.productions()
    rules_to_prob = defaultdict(int)
    nonterm_occurrence = defaultdict(int)
    
    #calculate probablity for rules
    for sent in tbank_trees:
        for production in sent.productions():
            if len(production.rhs()) == 1 and not isinstance(production.rhs()[0], Nonterminal):
                production = Production(production.lhs(), [production.rhs()[0].lower()])
            nonterm_occurrence[production.lhs()] += 1
            rules_to_prob[production] += 1
    for rule in rules_to_prob:
        rules_to_prob[rule] /= nonterm_occurrence[rule.lhs()]

    # use Katz smoothing
    rules_to_prob, vocab = katz_smooth(rules_to_prob, vocab)
    rules = list(rules_to_prob.keys())
    rules_reverse_dict = dict((j,i) for i, j in enumerate(rules))
    left_rules = defaultdict(set)
    right_rules = defaultdict(set)
    unary_rules = defaultdict(set)
    
    # classify left, right rules
    for rule in rules:
        if len(rule.rhs()) > 1:
            left_rules[rule.rhs()[0]].add(rule)
            right_rules[rule.rhs()[1]].add(rule)
        else:
            unary_rules[rule.rhs()[0]].add(rule)
    terminal_nonterms_rules = set(rule for rule in rules_to_prob if len(rule.rhs()) == 1 and isinstance(rule.rhs()[0], str))
    terminal_nonterms = defaultdict(int)
    for rule in terminal_nonterms_rules:
        terminal_nonterms[rule.lhs()] += 1
        pcfg_parser = {
    'vocab': vocab,
        'left_rules': left_rules,
        'right_rules': right_rules,
        'unary_rules': unary_rules,
        'rules_to_prob': rules_to_prob,
        'terminal_nonterms': terminal_nonterms
    }
    return pcfg_parser
Пример #4
0
def respondQuestion(sentence, keyWord, POS):
	if "Tell me" not in sentence:
		grammar = ""

		if POS == "NNPS" or POS == "NNS":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'they' | 'those'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'are' 
			""")
		elif POS == "NN" or "NNP":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'it' | 'that'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'is' 
			""")

		rand_sent_list = []
                response = ""
		for sentence in generate(grammar):
		    rand_sent_list.append(' '.join(sentence))
		while True:
			num = randint(0, len(rand_sent_list)-1)
			response = rand_sent_list[num]
			if "<>" in response and (POS == "NNS" or POS == "NN"):
				index = response.index("<>")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "[]" in response and (POS == "NNPS" or POS == "NNP"):
				index = response.index("[]")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "<>" not in response and "[]" not in response:
				break
		return response
	else:
		knowledgeRep(sentence)
Пример #5
0
def generate_pairs(depth, cfg):
    '''
    num_pairs: Integer denoting the number of translation pairs
    depth: integer for thedepth of the parse tree in the CFG
    cfg: chosen grammar, 1, 2 or 3
    '''
    if (cfg == 1):
        grammar = CFG.fromstring("""
        S -> Y  
        Y ->   a Y b | a Y | a |
        a -> '(' ')'  
        b -> '{' '}'  
        """)
    elif cfg == 2:
        grammar = CFG.fromstring("""
        S ->  X | Y  | X Y
        X -> a
        Y ->  b
        a -> '(' a ')'  |  
        b -> '{' b '}'  | 
        """)
    elif cfg == 3:
        grammar = CFG.fromstring("""
        S ->  X 
        X -> a | b
        a -> '(' a ')'  |  
        b -> '{' b '}' | '{' a '}'
        """)
    trg = list(generate(grammar, depth=depth))
    trg_list = []
    for sentence in trg:
        k = ''.join(sentence)
        trg_list.append(k)

    src_list = trg2src(trg)

    if cfg == 1:
        A = list((s + 'A ' for s in src_list))
    elif cfg == 2:
        A = list((s + 'B ' for s in src_list))
    elif cfg == 3:
        A = list((s + 'C ' for s in src_list))
    else:
        None

    B = list((s for s in trg_list))

    df = pd.concat([pd.Series(A), pd.Series(B)], axis=1)
    pairs = (df.iloc[:, 0] + df.iloc[:, 1]).values.tolist()
    return pairs
Пример #6
0
def gen_grammar_plural(verb, direct_object, count):
    try:
        verb = en.verb.present_participle(verb)
    except KeyError:
        return
    if verb != "":
        g1 = """
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		""" % (verb, direct_object)
        grammar1 = CFG.fromstring(g1)
        multiplier = 1
        with open('sentences.csv', 'ab') as csvwriter:
            writer = csv.writer(csvwriter)
            for sentence in generate(grammar1, n=999):
                sentence = ' '.join(sentence)
                if sentence.find('who') == 0:
                    multiplier = 1
                if sentence.find('what') == 0:
                    multiplier = 1
                if sentence.find('when') == 0:
                    multiplier = 2
                if sentence.find('where') == 0:
                    multiplier = 2
                if sentence.find('why') == 0:
                    multiplier = 4
                if sentence.find('how') == 0:
                    multiplier = 4
                writer.writerow((' '.join(sentence), multiplier * count))
Пример #7
0
def grammar_extraction(population_g, inital_state, subs):

    population_s = {}

    for pop in population_g:
        p = [inital_state]

        for n in population_g[pop]:

            if n != 0:
                p.append(n)

            else:
                pass

        separ = "\n"
        prime_grammar = separ.join(p)
        pre_grammar = prime_grammar.format(subs=subs)

        pos_grammar = """
    {seed}
    """.format(seed=prime_grammar)

        post_grammar = """
    {seed}
    """.format(seed=pre_grammar)
        grammar_use = CFG.fromstring(post_grammar)

        population_s[pop] = (grammar_use, pos_grammar)

    return population_s
Пример #8
0
def main():
    parser = argparse.ArgumentParser(description='CKY and PCKY')
    parser.add_argument('-g',
                        '--grammar',
                        help='Input file name',
                        required=True)
    parser.add_argument('-s',
                        '--sentence',
                        help='Input sentence',
                        required=True)
    args = parser.parse_args()

    grammar_text = None
    with open(args.grammar, 'r') as f:
        grammar_text = f.read()

    grammar = None
    result = None
    try:
        grammar = CFG.fromstring(grammar_text)
    except ValueError:
        grammar = PCFG.fromstring(grammar_text)

    if type(grammar) is CFG:
        result = cky(args.sentence, grammar)
    elif type(grammar) is PCFG:
        result = pcky(args.sentence, grammar)
Пример #9
0
def gen_grammar3_past_plural(verb, direct_object, count):
    g1 = """
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	""" % (verb, direct_object)
    grammar1 = CFG.fromstring(g1)
    multiplier = 0
    with open('sentences.csv', 'ab') as csvwriter:
        writer = csv.writer(csvwriter)
        for sentence in generate(grammar1, n=999):
            if sentence.find('who') == 0:
                multiplier = 1
            if sentence.find('what') == 0:
                multiplier = 1
            if sentence.find('when') == 0:
                multiplier = 2
            if sentence.find('where') == 0:
                multiplier = 2
            if sentence.find('why') == 0:
                multiplier = 4
            if sentence.find('how') == 0:
                multiplier = 4
            writer.writerow((' '.join(sentence), multiplier * count))
Пример #10
0
def rand_sentences(n=10, depth=6, wpt=0.25):
    #grammar = CFG.fromstring(open('assets/text/grammar.txt', 'r').read())
    grammar = CFG.fromstring(rand_vocabulary(wpt))
    sentences = list(generate(grammar, n=n * 20, depth=depth))
    return [
        ' '.join(i) for i in random.sample(sentences, min(n, len(sentences)))
    ]
    def __init__(self,
                 cfg_grammar=None,
                 origin_file='save/origin.txt',
                 oracle_file='save/oracle.txt',
                 wi_dict='save/word_index_dict.json',
                 iw_dict='save/index_word_dict.json',
                 sequence_length=None,
                 generate_from_scratch=False):
        if cfg_grammar is None:
            cfg_grammar = """
              S -> S PLUS x | S SUB x |  S PROD x | S DIV x | x | '(' S ')'
              PLUS -> '+'
              SUB -> '-'
              PROD -> '*'
              DIV -> '/'
              x -> 'x' | 'y'
            """

        self.grammar = CFG.fromstring(cfg_grammar)
        self.origin_file = origin_file
        self.oracle_file = oracle_file
        self.wi_dict = wi_dict
        self.iw_dict = iw_dict
        self.sequence_length = sequence_length
        self.generate_from_scratch = generate_from_scratch
        self.vocab_size = None
        import os, inspect
        self.saving_path = os.path.dirname(
            os.path.abspath(inspect.getfile(
                inspect.currentframe()))) + '/save/'
        return
Пример #12
0
 def __init__(self, blackboard):
     super(SentenceExpert, self).__init__(blackboard, "Sentence Expert")
     self.eva = ["be", "look", "feel"]
     self.atv = ["like", "hate", "love", "know", "need", "see"]
     """ eva - emotional verb active
         evp - emotional verb passive
         ej - emotion adjective
         en - emotional noun
         atv - attitude verb
     """
     self.grammar = CFG.fromstring("""
         S -> P | EP | Person ATV NP
         P -> NP VP
         EP -> Person EVA EJ | NP EVP Pron EJ | ENP VP
         ENP ->  EN OF NP
         NP -> Det N | Det JJ N | Det EJ JJ N | Det EJ N | Det EN
         VP -> V | V ERB | ERB V
         Det -> 'the'
         N -> 'n'
         V -> 'v'
         EVA -> 'eva'
         EVP -> 'makes'
         EN -> 'en'
         EJ -> 'ej'
         JJ -> 'adj'
         ERB -> 'erb'
         ATV -> 'atv'
         Person -> 'person'
         Pron -> 'pron'
         OF -> 'of'
         CC -> 'and' | 'but' | 'because' | 'so'
         """)
Пример #13
0
def get_pos_tags(pos_tuples):
    """
    Returns the POS tags from POS tuples of (word, tag)
    Updates the grammar for unknown tags
    """

    global grammar_string
    global grammar
    global terminals

    changed_grammar = False
    pos_tags = []

    for pos_tuple in pos_tuples:
        tag = pos_tuple[1]

        if tag not in terminals:

            if tag == '\'\'':
                tag = 'APOS'

            grammar_string += ' | \'' + tag + '\''

            terminals[tag] = None
            changed_grammar = True

        pos_tags.append(tag)

    if changed_grammar:
        grammar = CFG.fromstring(grammar_string)

    return pos_tags
Пример #14
0
def execute(text: str):
    groucho_grammer = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
    """)
    parser = ChartParser(groucho_grammer)

    tokens = word_tokenize(text=SAMPLE_3)
    print(type(tokens))
    print(tokens)
    for tree in parser.parse(tokens=[
            'The',
            'little',
            'bear',
            'saw',
            'the',
            'fine',
            'fat',
            'trout',
            'in',
            'the',
            'brook',
    ]):
        print(tree)
def gen_grammar3_past_plural(verb, direct_object, count):
	g1 ="""
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	"""%(verb, direct_object)
	grammar1 = CFG.fromstring(g1)
	multiplier = 0
	with open('sentences.csv', 'ab') as csvwriter:
		writer = csv.writer(csvwriter)
		for sentence in generate(grammar1, n=999):
			if sentence.find('who') == 0:
				multiplier = 1
			if sentence.find('what') == 0:
				multiplier = 1
			if sentence.find('when') == 0:
				multiplier = 2
			if sentence.find('where') == 0:
				multiplier = 2
			if sentence.find('why') == 0:
				multiplier = 4
			if sentence.find('how') == 0:
				multiplier = 4
			writer.writerow((' '.join(sentence) , multiplier*count))
def gen_grammar_plural(verb, direct_object, count):
	try:
		verb = en.verb.present_participle(verb)
	except KeyError:
		return
	if verb != "":
		g1 ="""
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		"""%(verb, direct_object)
		grammar1 = CFG.fromstring(g1)
		multiplier = 1
		with open('sentences.csv', 'ab') as csvwriter:
			writer = csv.writer(csvwriter)
			for sentence in generate(grammar1, n=999):
				sentence = ' '.join(sentence)
				if sentence.find('who') == 0:
					multiplier = 1
				if sentence.find('what') == 0:
					multiplier = 1
				if sentence.find('when') == 0:
					multiplier = 2
				if sentence.find('where') == 0:
					multiplier = 2
				if sentence.find('why') == 0:
					multiplier = 4
				if sentence.find('how') == 0:
					multiplier = 4
				writer.writerow((' '.join(sentence) , multiplier*count))
Пример #17
0
    def __init__(self, phonemes=None, onset=None, coda=None):
        self.phonemes = phonemes or Phoneme()

        # use CFG to structure syllables
        if onset == None: # optional onset
            onset = 'C | C C | \' \''
        elif onset: # mandatory onset
            onset = 'C | C C'
        else: # no onset
            onset = '\' \''

        if coda == None: # optional coda
            coda = 'C | \' \''
        elif coda: # mandatory coda
            coda = 'C'
        else: # no coda
            coda = '\' \''
        # nucleus is always present

        # based on the "typical model"
        grammar = '''
        S -> O V K
        O -> %s
        K -> %s
        C -> \'c\'
        V -> \'v\'
        ''' % (onset, coda)
        self.grammar = CFG.fromstring(grammar)
        self.syllables = self.generate_syllables()
Пример #18
0
def Tweet_content1():
  grammar = CFG.fromstring(demo_grammar)

  for sentence in generate(grammar, n=4): """generating sentence of 4 words depth"""
    print(' '.join(sentence))
    
    return sentence
Пример #19
0
def draw_1(s):
    m = s
    l = fool.cut(s)[0]
    print(l)
    p = product_grammar(m)
    grammar = CFG.fromstring("""
    S ->NP V NP U L|NP U NP V L| NP U L V NP|L U NP V NP|L V NP U NP|NP V L U NP
    NP -> N N|r NP|NP A NP|M Q NP|N|NP U NP|A U NP|N NP|NP C NP|NP U|M NP
    VP ->V|V NP|V VP|A VP|VP NP|VP U|VP C VP|VP P|VP uguo
    V -> v|vi|vshi
    N ->n|nr|t|ns|f|nx|nz
    R ->r
    C ->c
    P ->p
    L ->R|R NP
    U ->ude|y
    A ->a|d|ad
    M ->m
    Q ->q
    """ + p)
    cp = nltk.ChartParser(grammar)
    tree = cp.parse(l)
    stree = []
    for s in tree:
        st = []
        #s.draw()
        for i in range(len(s)):
            st.append([s[i].label(), ''.join(s[i].leaves())])
        stree.append(st)
    return stree
Пример #20
0
    def generate(self,
                 n=10,
                 verb='intransitive',
                 rc='none',
                 pp='none',
                 ident=False):
        """
            Generate input-output pairs with the main auxiliary in the given 
            language. Arguments specify whether the verb should be transitive 
            or intransitive, the position of the relative clause, and the 
            position of the prepositional phrase.
            The vocabulary used in this function is a random sample (class-wise)
            if the entire vocabulary to allow for generating sentences in a 
            reasonable amount of time.

            Args:
                n: integer number of pairs to be generated
                verb: 'transitive' or 'intransitive', type of verb
                rc: 'none', 'subject', or 'object', position of relative clause
                pp: 'none', 'subject', or 'object', position of prepositional
                    phrase
                ident: boolean indicating whether output is identical sentence 
                    or question
            
            Return:
                list of tuples (input, output, main_aux)
        """

        grammar = CFG.fromstring(self.get_grammar_string(verb, rc, pp))

        sentences = list()
        for sentence in generate_from_cfg(grammar, n=n):
            sentences.append(Language.transform(sentence, ident))
        return sentences
def demo():
    """
    A demonstration of the recursive descent parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """)

    for prod in grammar.productions():
        print(prod)

    sent = 'I saw a man in the park'.split()
    parser = parse.RecursiveDescentParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
Пример #22
0
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    sent = 'I saw a man in the park'.split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
Пример #23
0
 def setUp(self):
     if not exists(self.LEXICON_FILE_NAME):
         self.skipTest("Unable to find file {} as lexicon".format(
             self.LEXICON_FILE_NAME))
     if not exists(self.GRAMMAR_FILE_NAME):
         self.skipTest("Unable to find file {} as grammar".format(
             self.GRAMMAR_FILE_NAME))
     assert exists(self.PARSE_TREES_FILE_NAME)
     
     valid,lexiconText = q1utils.sanitizeAndValidateLexicon(
         self.LEXICON_FILE_NAME)
     if not valid:
         self.skipTest("Lexicon {} is invalid.".format(
             self.LEXICON_FILE_NAME))
     
     valid,grammarText = q1utils.sanitizeAndValidateGrammar(
         self.GRAMMAR_FILE_NAME)
     if not valid:
         self.skipTest("Grammar {} is invalid.".format(
             self.GRAMMAR_FILE_NAME))
     
     allRules = grammarText + '\n' + lexiconText
     
     try:
         grammar = CFG.fromstring(allRules)
         self._parser = BottomUpChartParser(grammar)
     except Exception as e:
         self.skipTest(str(e))
Пример #24
0
def restore(sents, mint=None, maxt=None, minh=None, maxh=None):
    """Get best infered grammar

    Parameters
    ----------
    sents: collection of str
        sentences to use in restoration
    mint: int
        check up values of t starting from this value

    maxt: int
        check up values of t up to this value

    minh: int
        check up values of h starting from this value

    maxh: int
        check up values of h up to this value

    Returns
    -------
    grammar : nltk.CFG
    """
    res = restore_all(sents, mint, maxt, minh, maxh)
    simplest = min(res.values(), key=cmp_to_key(_cmp_grammar_simplicity))

    return CFG.fromstring(simplest)
Пример #25
0
    def __init__(self,
                 cfg_grammar=None,
                 origin_file='save/origin.txt',
                 oracle_file='save/oracle.txt',
                 wi_dict='save/word_index_dict.json',
                 iw_dict='save/index_word_dict.json',
                 sequence_length=None):
        if cfg_grammar is None:
            cfg_grammar = """
              S -> S PLUS x | S SUB x |  S PROD x | S DIV x | x | '(' S ')'
              PLUS -> '+'
              SUB -> '-'
              PROD -> '*'
              DIV -> '/'
              x -> 'x' | 'y'
            """

        self.grammar = CFG.fromstring(cfg_grammar)
        self.origin_file = origin_file
        self.oracle_file = oracle_file
        self.wi_dict = wi_dict
        self.iw_dict = iw_dict
        self.sequence_length = sequence_length
        self.vocab_size = None
        return
Пример #26
0
def do_grammar_tests():
    from nltk import CFG
    grammar_files = [
        'grammar-mpropp.txt', 'grammar-mpropp2.txt', 'grammar-lakoff.txt',
        'grammar-gervas.txt', 'grammar-finlayson.txt'
    ]
    grammar_test = [
        i.split() for i in open('data/grammar-test-filtered.txt').readlines()
    ]
    for i in grammar_files:
        grammar_file = 'data/' + i
        print grammar_file, '\t',
        g = CFG.fromstring(open(grammar_file).read())
        #pprint.pprint(g.productions())
        coverage = True
        for i, tokens in enumerate(grammar_test):
            try:
                g.check_coverage(tokens)
                print 1,
            except Exception as e:
                print 0,  #,e`
                coverage = False
        print
        #rdp = nltk.RecursiveDescentParser(g)
        #srp = nltk.ShiftReduceParser(g)
        #bulccp = nltk.BottomUpLeftCornerChartParser(g)
        if coverage:
            for i, tokens in enumerate(grammar_test):
                pass
Пример #27
0
def perform_function(sentence):
    # print(sentence)
    output = ""
    g_string = (" SIGMA -> DELTA\n"
                " DELTA -> S P C|S P C A|S P A | S P \n"
                " A -> Pre Comp \n"
                " S -> h |m h\n"
                " C -> m h|h\n"
                " P -> n l|aux l| l \n"
                " m -> d e| d\n"
                " h -> " + name_string + "\n"
                " l -> 'boarded'|'cooked'|'climbed'|'bought'|'gave'\n"
                " Pre -> 'ni'\n"
                " e -> 'black'\n"
                " d -> 'the'|'The'\n"
                " aux -> 'n'")
    gramma = CFG.fromstring(g_string)
    parser = nltk.ChartParser(gramma)
    try:
        ans = parser.parse(sentence.split())
        output = " ".join(str(x) for x in list(ans))
    except ValueError as e:
        # print("error : " + str(e))
        output = "Error : " + str(e)
    return output
Пример #28
0
def generate_sources_grammar(attribute, parent, phase):
    gr = [
        Production(Nonterminal('S'), (Nonterminal('AUX1'), )),
        Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))),
        Production(Nonterminal('S1'), ('you', Nonterminal('V1'))),
        Production(Nonterminal('V1'), ('think', Nonterminal('ART'))),
        Production(Nonterminal('ATTR'), (attribute, Nonterminal('END'))),
        Production(Nonterminal('END'), ('?', ))
    ]
    if phase == 1:
        v2 = Production(Nonterminal('V2'), ('included', Nonterminal('ATTR')))
    else:
        v2 = Production(Nonterminal('V2'),
                        ('didn´t include', Nonterminal('ATTR')))
    if parent is None:
        article = Production(Nonterminal('ART'), ('the', Nonterminal('CLS')))
        parent = Production(Nonterminal('CLS'), ('sources', Nonterminal('V2')))
    else:
        article = Production(Nonterminal('ART'), ('the', Nonterminal('PAR')))
        parent = Production(Nonterminal('PAR'), (parent, Nonterminal('V2')))
    gr.append(v2)
    gr.append(article)
    gr.append(parent)
    grammar = CFG(Nonterminal('S'), gr)
    return grammar
Пример #29
0
def context_free_grammar():
    cfg = CFG.fromstring("""\
    ################# Rules #################
    S -> NP VP
    S -> PP NP VP
    S -> Wh Aux NP VP 
    NP -> ProperNoun | CC ProperNoun | N | ProperNoun NP | AP N | DET NP | N PP    
    VP -> V | V NP | Adv VP | V NP VP
    AP -> Adj | Adj AP
    PP -> P NP | P NP VP
    
    ################# Lexicons ################# 
    N -> 'milk'| 'shoes' | 'salad' | 'kitchen' | 'midnight' | 'table'
    V -> 'laughs' | 'laughed' | 'drink' | 'wears' | 'serves' | 'drinks' | 'thinks' | 'wear'
    ProperNoun -> 'Bart' | 'Homer' | 'Lisa'
    Aux -> 'do' | 'does'
    CC -> 'and'
    Adj -> 'blue' | 'healthy' | 'green' 
    DET -> 'a' | 'the' 
    Adv -> 'always' | 'never' 
    P -> 'in' | 'before' | 'on' | 'when'
    Wh -> 'when'
    """)
    cfparser = ChartParser(cfg)
    sents = text.splitlines()
    for sent in sents:
        parses = cfparser.parse(sent.split())
        print(sent)
        for tree in parses:
            print(tree)
Пример #30
0
def generate_events_grammar(attribute, parent, phase):
    gr = [
        Production(Nonterminal('S'), (Nonterminal('AUX1'), )),
        Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))),
        Production(Nonterminal('S1'), ('you', Nonterminal('V1'))),
        Production(Nonterminal('V1'), ('think', Nonterminal('ART'))),
        Production(Nonterminal('ATTR'), (attribute, Nonterminal('END'))),
        Production(Nonterminal('END'), ('?', ))
    ]
    if parent is not None:
        art = Production(Nonterminal('ART'), ('the', Nonterminal('PAR')))
        par = Production(Nonterminal('PAR'), (parent, Nonterminal('V2')))
    else:
        art = Production(Nonterminal('ART'), ('the', Nonterminal('PAR')))
        par = Production(
            Nonterminal('PAR'),
            ('events that caused the incident', Nonterminal('V2')))
    if phase == 1:
        v2 = Production(Nonterminal('V2'), ('included', Nonterminal('ATTR')))
    else:
        v2 = Production(Nonterminal('V2'),
                        ('did not include', Nonterminal('ATTR')))
    gr.append(art)
    gr.append(par)
    gr.append(v2)
    grammar = CFG(Nonterminal('S'), gr)
    return grammar
Пример #31
0
def demo():
    """
    A demonstration of the recursive descent parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    for prod in grammar.productions():
        print(prod)

    sent = "I saw a man in the park".split()
    parser = parse.RecursiveDescentParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
Пример #32
0
def main():
    source = "./grammar.cfg"

    sentences = [
        "skywalker sarà tuo apprendista",  #tuo apprendista skywalker sarà
        "tu avrai novecento anni di età",  # novecento anni di età tu avrai
        "tu hai amici lì",  # amici lì tu hai
        "noi siamo illuminati",  # illuminati noi siamo
        "il lato oscuro è arduo da vedere",  # arduo da vedere il lato oscuro è
        "tu hai molto da apprendere ancora",  # molto da apprendere ancora tu hai
        "skywalker corre veloce",  # veloce Skywalker corre
        "il futuro di questo ragazzo è nebuloso"
    ]  # nebuloso il futuro di questo ragazzo è

    with open(source, encoding='utf-8') as file:
        grammar = CFG.fromstring(file.read())
        #print(grammar)

    i = 0
    if grammar.is_chomsky_normal_form():
        for sent in sentences:
            it_tree = cky(sent.split(), grammar)
            save_tree("it" + str(i), it_tree)
            it_tree.draw()
            if (it_tree is not None):
                yoda_tree = translate_it_yo(it_tree)
                save_tree("yo" + str(i), yoda_tree)
                yoda_tree.draw()
            i += 1
    else:
        exit('Error: the grammar must be in Chomsky Normal Form')
Пример #33
0
def get_parser_for_grammar(input_code='program.gir', grammar_name='grammar'):
    terminal_rules = get_terminal_rules(read_lines(input_code))

    with open(grammar_name, 'r') as f:
        lines = '\n'.join([x for x in f.readlines() if x[0] != '#'])
        lines = lines + '\n' + '\n'.join(terminal_rules)
        return nltk.ChartParser(CFG.fromstring(lines))
Пример #34
0
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    sent = "I saw a man in the park".split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
Пример #35
0
 def generate_from_grammar(self, n, depth):
     grammar = CFG.fromstring(self.gramma)
     print("Generuje dla n " + n + " i depth " + depth)
     for track in generate(grammar, n=int(n), depth=int(depth)):
         self.track_array.append(' '.join(track))
         # produkcje
         numbers = " ".join(track)
         self.productions.append(numbers)
Пример #36
0
def restore_all(sents, mint=None, maxt=None, minh=None, maxh=None):
    """Get all infered grammars

    For all combinations of parameters `t` and `h` there may be a different grammars

    Grammar syntax example:

    S -> 'c' A 'a' B | 'b'

    A -> 'a' A | 'A'

    B -> 'b' A

    Parameters
    ----------
    sents: collection of str
        sentences to use in restoration
    mint: int
        check up values of t starting from this value

    maxt: int
        check up values of t up to this value

    minh: int
        check up values of h starting from this value

    maxh: int
        check up values of h up to this value

    Returns
    -------
    grammars : dict of str
        grammar strings for every valid pair of t and h
    """
    maxlen = len(max(sents, key=len))
    mint = mint if mint is not None else 1
    minh = minh if minh is not None else 1

    maxt = maxt if maxt is not None else maxlen
    maxh = maxh if maxh is not None else maxlen

    res = {}
    for t, h in itertools.product(range(mint, maxt + 1), range(minh, maxh + 1)):
        p = Pnet(sents)
        p = net_transform(p, t, h)
        _, g_str = net_to_grammar(p, t)

        g = CFG.fromstring(g_str)

        if all(check_grammar(g, s) for s in sents):
            print(f'Success with t={t}, h={h}')
            print(g_str, '\n')
            res[(t, h)] = g_str
        else:
            print(f'Fail with t={t}, h={h}')

    return res
Пример #37
0
def generate_name(G):
    grammar = CFG.fromstring(G)

    parser = ChartParser(grammar)

    gr = parser.grammar()
    tokens = produce(gr, gr.start())
    name = ''.join(tokens)
    return name.title()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-u',
                        '--upper',
                        type=int,
                        required=True,
                        help='Model size upper bound')
    parser.add_argument('-d',
                        '--depth',
                        type=int,
                        required=True,
                        help='Maximum CFG production depth considered')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Show progress and timing')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        required=True,
                        help='Path to text file containing CFG specification')
    parser.add_argument('-e',
                        '--expr',
                        type=str,
                        required=True,
                        help='Path quantifier expressions should'
                        ' be saved in')
    parser.add_argument('-b',
                        '--bit',
                        type=str,
                        required=True,
                        help='Path quantifier bitstrings should be saved in')

    args = parser.parse_args()

    upper = args.upper
    max_depth = args.depth
    verbose = args.verbose
    in_file = args.input
    expr_file = args.expr
    bit_file = args.bit

    with open(in_file, 'r') as f:
        grammar_str = f.read()

        # NLTK does not like unnecessary indentation
        pattern = re.compile(r'\n\s+\|')
        grammar_str = pattern.sub(' |', grammar_str)
        grammar = CFG.fromstring(grammar_str)

    qg = QuantifierGenerator(grammar, upper, max_depth, verbose)

    with open(expr_file, 'w') as f_expr:
        with open(bit_file, 'wb') as f_bit:
            for expr, q_str in qg.generate():
                f_expr.write(f'{expr}\n')
                f_bit.write(q_str.tobytes())
Пример #39
0
 def from_cfg_file(cls, path: str, **kwargs) -> "CFGrammarNode":
     """
     :param path: path to file containing a context-free grammar
     :return: new Derivation tree node
     """
     assert os.path.exists(path)
     with open(path) as file:
         str_grammar = file.read()
     nltk_grammar = CFG.fromstring(str_grammar)
     return cls(nltk_grammar.start(), nltk_grammar, **kwargs)
def chart_parse(in_file, grammar_file, out_file):
    text = unicode(open(in_file, 'r').read(), errors='ignore')
    output = open(out_file, 'w')
    grammar_string = unicode(open(grammar_file, 'r').read(), errors='ignore')
    try:
        grammar = CFG.fromstring(grammar_string)
        parser = nltk.ChartParser(grammar)
        sentences = nltk.sent_tokenize(text)
        for sentence in sentences:
            words = nltk.word_tokenize(sentence)
            tree = parser.parse(words)
            for item in tree:
                output.write(str(item))
                output.write('\n')
    except Exception, e:
        message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e)
        sys.stderr.write(message)
        sys.exit()
Пример #41
0
def someGrammaticalDilemmas():
    print "page 292 8.1  Some Grammatical Dilemmas"
    print "=============== Linguistic Data and Unlimited Possibilities ==============="
    from nltk import CFG
    groucho_grammar = CFG.fromstring(""" 
        S -> NP VP
        PP -> P NP
        NP -> Det N | Det N PP | 'I' 
        VP -> V NP | VP PP 
        Det -> 'an' | 'my' 
        N -> 'elephant' | 'pajamas' 
        V -> 'shot'
        P -> 'in'
        """)
    sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
    parser = nltk.ChartParser(groucho_grammar)
    trees = parser.nbest_parse(sent)
    for tree in trees:
        print tree
Пример #42
0
    def __init__(self, cfg_grammar=None, origin_file='save/origin.txt', oracle_file='save/oracle.txt',
                 wi_dict='save/word_index_dict.json', iw_dict='save/index_word_dict.json',
                 sequence_length=None):
        if cfg_grammar is None:
            cfg_grammar = """
              S -> S PLUS x | S SUB x |  S PROD x | S DIV x | x | '(' S ')'
              PLUS -> '+'
              SUB -> '-'
              PROD -> '*'
              DIV -> '/'
              x -> 'x' | 'y'
            """

        self.grammar = CFG.fromstring(cfg_grammar)
        self.origin_file = origin_file
        self.oracle_file = oracle_file
        self.wi_dict = wi_dict
        self.iw_dict = iw_dict
        self.sequence_length = sequence_length
        self.vocab_size = None
        return
Пример #43
0
def main():
    parser = argparse.ArgumentParser(description='CKY and PCKY')
    parser.add_argument('-g', '--grammar', help='Input file name', required=True)
    parser.add_argument('-s', '--sentence', help='Input sentence', required=True)
    args = parser.parse_args()

    grammar_text = None
    with open(args.grammar, 'r') as f:
        grammar_text = f.read()

    grammar = None
    result = None
    try:
        grammar = CFG.fromstring(grammar_text)
    except ValueError:
        grammar = PCFG.fromstring(grammar_text)

    if type(grammar) is CFG:
        result = cky(args.sentence, grammar)
    elif type(grammar) is PCFG:
        result = pcky(args.sentence, grammar)
def CFG_grammar():
    GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
    usr_goal = ENTITY_PLACE
    usr_find = GOAL_FIND
    VP,NP,O = nonterminals('VP,NP,O')

    # Build a CFG based on the symbols that generated above.
    grammar = CFG.fromstring("""
    VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE
    NP -> P ENTITY_PLACE | ENTITY_PLACE
    GOAL_FIND -> 'find'
    GOAL_FIND  -> 'show'
    GOAL_FIND  -> 'tell'
    O -> 'me'
    P -> 'in'
    ENTITY_PLACE -> 'starbucks'
    ENTITY_PLACE -> 'the starbucks'
    ENTITY_PLACE -> 'a starbucks'
    ENTITY_PLACE -> 'coffee bean'
    ENTITY_PLACE -> 'the coffee bean'
    ENTITY_PLACE -> 'a coffee bean'

    """)
    return grammar
Пример #45
0
        
        
        
# Filter each sentence and return them all.
def eliminate(sentence):
    sents=nltk.sent_tokenize(sentence)
    for sent in sents:
        str=filter(sent)
        return str

#Here input is the chosen option on UI.
#Given IDs to each question as per NCERT Book,input will be given that chosen value.
input=26
# Generate variations of a particular question based on the input and its corresponding grammar.
if input==2:
    g=CFG.fromstring(g1)
    g2=CFG.fromstring(g2)
    rd_parser=nltk.RecursiveDescentParser(g)
    for sent,sent2 in zip(generate(g2,n=100),generate(g,n=100)):
        newsent1=' '.join(sent)
        newsent2=' '.join(sent2)
        ans1=eliminate(newsent1)
        ans2=eliminate(newsent2)
        if(ans1 == None or ans2 == None):
            pass
        else:
            print(ans1)
            print(ans2)
            print("Determine the length and breadth")
            print("\n")
elif input==4:
Пример #46
0
	def load_grammar( self ):
		s = open( self.name + '.cfg' ).read()
		self.grammar = CFG.fromstring(s)
		return
Пример #47
0
def output(request):
    # Validation of form
    if request.method == "POST":
        # Validation of request
        if 'inputURL' in request.POST:
            # Validation of image url
            imageURL = request.POST.get('inputURL')
            image_output = imageURL
            indexOfDot = imageURL.rfind(".")
            if indexOfDot == -1:
                return fail(request) # not an image URL
            indexOfDot += 1
            extension = imageURL[indexOfDot:]
            if extension != 'jpg' and extension != 'jpeg' and extension != 'png':
                return fail(request) # not a valid image (jpg, jpeg, png)
                
            client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6'
            secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN'
                
            clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set.
            return output(request, makes{image_output:'image_output', text_output:'text_output'})
                result = clarifai_api.tag_image_urls(imageURL)
            except ApiError:
                #return fail(request)
                
                messages.add_message(request, messages.INFO, "ApiError")
                return HttpResponseRedirect('makestory/fail.html')
            
            
            class_list = result['results'][0]['result']['tag']['classes']
            prob_list = result['results'][0]['result']['tag']['probs']
            
            class_str = ""
            for i in range(0, len(class_list)):
                class_str += class_list[i] + " " 
            
            # currently just the list of matched words
            text_output = class_list.__str__()
            
            # Parts of speech recognition
            tokens = nltk.word_tokenize(class_str)
            dictionary = PyDictionary()
            
            
            
            nouns = []
            verbs = []
            adjectives = []
            otherPos = []
            for word in tokens:
                definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4
                assignment = definition.keys()[0] # Get the part of speech from the dictonary
                
                # assignment = tuple[1]
                
                if assignment == 'Noun':
                    nouns.append(word)
                elif assignment == 'Verb':
                    verbs.append(word)
                elif assignment == 'Adjective':
                    adjectives.append(word)
                else:
                    otherPos.append(word)
                    
                    
            # Create the grammar
            #P:prepositions, DET:articles, adverbs
            P = ["on","in","at","since","for","ago","before","to","past","to","until","by","in","at","on","under","below","over","above","into","from","of","on","at"]
            DET = ["the","a","one","some","few","a few","the few","some"]
            
            assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc.
            
            pos_tags = []
            pos_words = {}
            for tuple in assignments:
                word = tuple[0]
                pos = tuple[1]
                if pos in pos_words:
                    pos_words[pos].append(word)
                else:
                    pos_words[pos] = []
                pos_tags.append(pos)
                
                
            
            
            grammar = """
            S -> NP VP
            PP -> P NP
            NP -> Det N | Det N PP
            VP -> V NP | VP PP
            Det -> 'DT'
            """
            # N -> 'NN'
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
            return render(request, 'makestory/output.html',
                {
                'nouns_output': nouns,
                'verbs_output': verbs,
                'adjectives_output': adjectives,
                'otherPos_output': otherPos,
                'imageURL_output': imageURL,
                'caption_output': caption,
                'story_output': story,
                'sentences_test_output': sentences,
                }
            )
Пример #48
0
# Tokenize the sentence.
tokenized = word_tokenize(words)

# Build the grammar for parsing.
GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
usr_goal = ENTITY_PLACE
usr_find = GOAL_FIND
VP,NP,O = nonterminals('VP,NP,O')

grammar = CFG.fromstring("""
VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE
NP -> P ENTITY_PLACE | ENTITY_PLACE
GOAL_FIND -> 'find'
GOAL_FIND  -> 'show'
GOAL_FIND  -> 'tell'
O -> 'me'
P -> 'in'
ENTITY_PLACE -> 'starbucks'
ENTITY_PLACE -> 'Starbucks'
ENTITY_PLACE -> 'Coffee Bean'
ENTITY_PLACE -> 'Coffeebean'

""")
rd_parser = RecursiveDescentParser(grammar)

# Parsing the sentence.
parsed_words = []
for parsing in rd_parser.parse(tokenized):
    print(parsing)

# Find GOAL and ENTITY
for detect in parsing:
Пример #49
0
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
import nltk
from nltk import CFG

grammar = CFG.fromstring("""
	S -> NP VP
	NP -> Det Noun | Noun Adj
	VP -> Verb NP
	Det -> 'el'
	Noun -> 'gato' | 'pescado'
	Verb -> 'come'
	Adj -> 'crudo'
	""")

def dibujo_arbol(texto):
	sent = texto.split()
	parser = nltk.ChartParser(grammar)
	for tree in parser.parse(sent):
		print(tree)
		tree.draw()


dibujo_arbol('el gato come pescado crudo')
dibujo_arbol('gato crudo come el gato')
dibujo_arbol('el pescado come gato crudo')
	

from nltk import CFG
from nltk import parse
from nltk import Tree

grammar = CFG.fromstring('''
   S     -> WHO QP QM | WHICH Nom QP QM
   QP    -> VP | DO NP T
   VP    -> I | T NP | BE A | BE NP | VP AND VP
   NP    -> P | AR Nom | Nom
   Nom   -> AN | AN Rel
   AN    -> N | A AN
   Rel   -> WHO VP | NP T
   N     -> "Ns" | "Np"
   I    -> "Is" | "Ip"
   T    -> "Ts" | "Tp"
   A     -> "A"
   P     -> "P"
   BE    -> "BEs" | "BEp"
   DO    -> "DOs" | "DOp"
   AR    -> "AR"
   WHO   -> "WHO"
   WHICH -> "WHICH"
   AND   -> "AND"
   QM    -> "?"
   ''')

chartpsr = parse.ChartParser(grammar)

def all_parses(wlist,lx):
    """returns all possible parse trees for all possible taggings of wlist"""
''' Generate horoscopes '''
import logging
from nltk.grammar import Nonterminal
from nltk import CFG
from os import path
import random
import re

HERE = path.abspath(path.dirname(__file__))

try:
    GRAMMAR = CFG.fromstring(open('%s/data/grammar.txt' % HERE).read())
except IOError:
    logging.error('Unable to load grammar file')
    raise IOError

def get_sentence(start=None, depth=7):
    ''' follow the grammatical patterns to generate a random sentence '''
    if not GRAMMAR:
        return 'Please set a GRAMMAR file'

    start = start if start else GRAMMAR.start()

    if isinstance(start, Nonterminal):
        productions = GRAMMAR.productions(start)
        if not depth:
            # time to break the cycle
            terminals = [p for p in productions if not isinstance(start, Nonterminal)]
            if len(terminals):
                production = terminals
        production = random.choice(productions)
Пример #53
0
'''

from nltk.parse.generate import generate #, demo_grammar
from nltk import CFG


demo_grammar = """
  S -> NP VP
  NP -> Det N
  PP -> P NP
  VP -> 'slept' | 'saw' NP | 'walked' PP
  Det -> 'the' | 'a'
  N -> 'man' | 'park' | 'dog'
  P -> 'in' | 'with'
"""
grammar = CFG.fromstring(demo_grammar)
print(grammar)


#Join words and generate based off of grammar - for n 
for sentence in generate(grammar, n=12):
    print(' '.join(sentence))

'''
Notes: 
Need to symbolize the grammar
Have the machine process the language
Need to integrate with Markov chain - file 'agiliq-markov.py'
'''
for sentence in generate(grammar, depth=4):
    print(' '.join(sentence))
Пример #54
0
def main():
	while 1 == 1 :

		print("Enter a statement")
		statement = raw_input().strip()
		if statement == '':
			continue
		if statement.lower() in ['bye','goodbye','tata','good-bye']:
			print("Good-bye, dear human")
			exit()
		userNameLoader() #loads the username

		tagged_arr = Viterbi(statement)

		tokens = word_tokenize(statement)

		isFile = False
		isDir = False

		#check if all of the elements are same
		count = 1
		tag = tagged_arr[1]
		for i in range(2,len(tagged_arr)):
			if tagged_arr[i] == tag:
				count = count + 1
		
		if count == len(tagged_arr)-1:
			n = len(tokens)
			for i in range(0,n):				
				tag_temp = Viterbi(tokens[i])[1]
				tagged_arr[i+1] = tag_temp

		for i in range(0,len(tokens)):
			if i+2 <= len(tokens):
				if tokens[i] in ['folder','file','directory'] and tagged_arr[i+2] in ['VB','VBN']:
					tagged_arr[i+1] = 'NN'
			elif tokens[i] in ['folder','file','directory'] and tagged_arr[i] in ['VB','VBN']:
					tagged_arr[i+1]='NN'

		for i in range (0,len(tokens)):
			if tagged_arr[i+1] in ['NN','NNS','NP','VB','AN','JJ'] and tokens[i]!= 'open':
				for j in range(0,len(appnames)):
					if tokens[i].lower() in appnames[j] and tokens[i].lower() not in ['file','folder','directory','copy','videos','desktop']:
						tagged_arr[i+1]='AN'
						tokens[i] = commands[j]
						isFile = True
						break
				if isDirName(userName,tokens[i])==True:
						tagged_arr[i+1] = 'AN'
						isDir = True
				elif isFileName(userName,tokens[i])==True:
						tagged_arr[i+1] = 'AN'
						isFile = True

		for i in range (0,len(tokens)):
			if tokens[i] in verbList:
				tagged_arr[i+1] = 'VB'
				break
			elif tokens[i] in ['words','lines']:
				tagged_arr[i+1] = 'NNS'
				break				
		
		#print(tagged_arr)

		grammar_string = """
		  S -> NPP VP
		  S -> VP
		  NPP -> MODAL PRONOUN | NOUN VA | APPNAME
		  NPP -> DET FOLDER VERB NAME | FOLDER VERB NAME| FOLDER NAME | DET NAME
		  NPP -> DET JJ FOLDER VERB NAME | JJ FOLDER VERB NAME| JJ FOLDER NAME
		  NPP -> DET AN FOLDER VERB NAME | AN FOLDER VERB NAME| AN FOLDER NAME
		  NPP -> DET APPNAME
		  NPP -> BACK TONAME | DET BACK TONAME
		  NPP -> WQUERY
		  WQUERY -> WQL AP NOUN | WRB AP NOUN
		  BACK -> 'background' | 'BACKGROUND' | 'Background'
		  BACK -> 'wallpaper' | 'WALLPAPER' | 'Wallpaper'
		  BACK -> AN
		  TONAME -> TO FILENAME | TO DET FILENAME
		  CPY -> DET FILENAME SOURCE DESTINATION | DET FILENAME DESTINATION SOURCE
		  CPY -> FILENAME SOURCE DESTINATION | FILENAME DESTINATION SOURCE
		  SOURCE -> IN SOURCER
	     	  SOURCER -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME
		  SOURCER -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME
		  DESTINATION -> TO DESTINATIONR
		  DESTINATIONR -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME 
		  DESTINATIONR -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME
		  FOLDER -> 'folder'|'directory'|'file'|'Folder'|'File'|'Directory'|'FOLDER'|'FILE'|'DIRECTORY'
		  FOLDER -> NN
		  VP -> VERB NPP | VERB VP | ADVERB VP | VERB CPY
		  VP -> BER RB IN PPS
		  PPS -> DET PP | PP
		  PP -> JJ NOUN | NOUN | FOLDER VBN DET FILENAME | FOLDER VBN FILENAME | FOLDER FILENAME | FOLDER DET FILENAME 
		  PP -> FILENAME
		  MODAL -> MD
		  PRONOUN -> PPSS | PPO
		  VA -> VERB APPNAME
                  APPNAME -> AN
  		  VERB -> VB | VBN
		  ADVERB -> RB
		  DET -> AT
		  NOUN -> NN | NP | NNS
		  FILENAME -> AN
		  """
		
		str = 'NAME -> '
		for i in range(1,len(tagged_arr)):
			str+=tagged_arr[i]
			if i < len(tagged_arr)-1:
				str+=" | "

		str+="\n"

		grammar_string += str

		#add POS tags
		tl = len(tagged_arr)
		for i in range(1,tl):
			if tokens[i-1] not in ['file','folder','directory']:
				grammar_string+=tagged_arr[i]+" -> \'"+tokens[i-1]+"\'\n"

		simple_grammar = CFG.fromstring(grammar_string)
		#print(simple_grammar)

		parser = nltk.ChartParser(simple_grammar)

		json_str = ''
	
		ANs= []
		ANJSON = []
		VBs = []
		VBJSON = []
		NAMEs= []
		NJSON = []
		CCYs = []
		SOURCEs = []
		DESTs = []
		FILENAMEs = []
		TONAMEs = []
		TONAMEFILEs = []
		PPs = []
		PPANs = []
		WQUERY = []
		OBJ = []

		for tree in parser.parse(tokens):
			#print(tree)
			ANs = list(tree.subtrees(filter=lambda x: x.label()=='AN'))
			VBs = list(tree.subtrees(filter=lambda x: x.label()=='VERB'))
			NAMEs = list(tree.subtrees(filter=lambda x: x.label()=='NAME'))
			CCYs = list(tree.subtrees(filter=lambda x:x.label()=='CCY'))
			SOURCEs = list(tree.subtrees(filter=lambda x:x.label()=='SOURCER'))
			SOURCEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), SOURCEs)
			DESTs = list(tree.subtrees(filter = lambda x:x.label()=='DESTINATIONR'))
			DESTs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), DESTs)
			FILENAMEs = list(tree.subtrees(filter = lambda x:x.label()=='FILENAME'))
			FILENAMEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), FILENAMEs)
			TONAMEs = list(tree.subtrees(filter=lambda x:x.label()=='TONAME'))
			TONAMEFILEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), TONAMEs)
			PPs = list(tree.subtrees(filter = lambda x:x.label()=='PP'))
			PPANs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), PPs)
			WQUERY = list(tree.subtrees(filter = lambda x:x.label()=='WQUERY'))
			OBJ = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='NOUN')), WQUERY)

		if(len(PPANs)>0):
			PPANs = PPANs[0][0]
			PPANs = tree2json(PPANs)
			OBJ = tree2json(OBJ[0][0])
			obj = OBJ['NOUN'][0]
			nounArr = ['NNS','NP','NN']
			for n in nounArr:
				if n in obj:
					obj = obj[n]
					break
			obj = obj[0]
			counter(PPANs['AN'][0],obj)

		for i in xrange(0,len(ANs)):
			ANJSON.append(tree2json(ANs[i]))

		for i in xrange(0,len(VBs)):
			VBJSON.append(tree2json(VBs[i]))

		for i in xrange(0,len(NAMEs)):
			NJSON.append(tree2json(NAMEs[i]))

		for i in xrange(0,len(VBs)):
			verbRoot = VBJSON[i]['VERB']
			if 'VB' in verbRoot[0]:
				if verbRoot[0]['VB'][0] in ['open','close','shut','exit']:
					if isFile == True:
						actionSequence(verbRoot[0]['VB'][0],ANJSON,True)
					elif isDir == True:
						actionSequence(verbRoot[0]['VB'][0],ANJSON,False)
				elif verbRoot[0]['VB'][0] in ['make','create']:
					#if isDir == True:
					createSequence(verbRoot[0]['VB'][0],NJSON,str.rstrip('\n'))				
				elif verbRoot[0]['VB'][0] in ['copy','cut','move','duplicate']:
					SOURCEs = tree2json(SOURCEs[0][0])
					DESTs = tree2json(DESTs[0][0])
					FILENAMEs = tree2json(FILENAMEs[0][0])
					cutCopy(verbRoot[0]['VB'][0],FILENAMEs,SOURCEs,DESTs)
				elif verbRoot[0]['VB'][0] in ['change','replace']:
					changeWallpaper(verbRoot[0]['VB'][0],tree2json(TONAMEFILEs[0][0]))
Пример #55
0
with_blank_spaces = ' '


############################################
############################################
############################################
def choose_line(some_lines):#5
    return a_random.choice(#7
                    some_lines).lower() #5

############################################

############################################
choose = choose_line #5

g = G.fromstring(#7
                    this_is_the_grammar) #5
############################################

############################################
while not len(pentas):#5
    for poem in generate(g, #7
                           start=N('five')): #5
############################################

############################################
      pentas.append(#5
                    with_blank_spaces.join(poem))#7

fives = pentas #5
############################################
regex = re.compile("(\w+\s*),(\s*\w+\s*)(,|(and))+(\s*(and)?\s*\w+)")
#reg = re.compile("\((,|!|\?)\)\1")
#regex2 = re.compile("\((,|!|\?)\)(\s*\w+\s*)+\1")
#regex2 = re.compile("\(,\)(\s*\w+\s*)+\1")
regex2 = re.compile(",(\s*\w+\s*)+,")
#regex3 = re.compile("!(\s*\w+\s*)+!")
#regex3 = re.compile("\((\s*\w+\s*)+\)(\s*\w+\s*)*\((,|!|\?)\)\1(\s*\w+\s*)*\2\1(\s*\w+\s*)*\2?")
#regex4 = re.compile("(\s*\w+\s*)*\((\s*\w+\s*)+\)\((,|!|\?)\)(\s*\w+\s*)*\1\2(\s*\w+\s*)*\1\2?")
#triple_to_dist = {}
list_reg = re.compile("(\w|\s)\s*\)")
grammar1 = CFG.fromstring("""
   S -> NP VP
   PP -> P NP
   NP -> Det N | Det N PP | Det A N | A N | N PP | "PRP$" N | N | "PRP$" A N | A N PP | N A PP | NP CC NP | NP NP NP | NP NP CC NP
   VP -> V NP | VP NP | VP PP | AV V | AV V NP | V AV | V AV NP | VP PP | V | VP CC VP | VP VP VP | VP VP CC VP
   Det -> "DT"
   V -> "VBZ" | "VB" | "VBG" | "VBN" | "VBD" | "VBP"
   P -> "PP" | "IN"
   A -> "JJ" | "JJR" | "JJS"
   AV -> "RB" | "RBR" | "RBS"
   N -> "NN" | "NNS" | "NNP" | "NNPS" | "PRP" | "CD"
  """)
parser1 = nltk.ChartParser(grammar1)
grammar2 = CFG.fromstring("""
   T -> S S S
   S -> NP VP
   PP -> P NP
   NP -> Det N | Det N PP | Det A N | A N | N PP | "PRP$" N | N | "PRP$" A N | A N PP | N A PP | NP CC NP | NP NP NP | NP NP CC NP
   VP -> V NP | VP NP | VP PP | AV V | AV V NP | V AV | V AV NP | VP PP | V | VP CC VP | VP VP VP | VP VP CC VP
   Det -> "DT"
   V -> "VBZ" | "VB" | "VBG" | "VBN" | "VBD" | "VBP"
   P -> "PP" | "IN"
Пример #57
0
#!/bin/env python3.5
from nltk import RecursiveDescentParser, pos_tag, CFG, Tree
from nltk.parse.earleychart import EarleyChartParser
from nltk.draw import TreeView
from os import system, remove

grammar1  = CFG.fromstring("""S -> NP VP
    PP -> P | P NP | P VP
    NP -> Det NP PP1 | Adj N PP1 | N PP1 | N NP PP1
    PP1 -> PP PP1 | 
    VP -> V NP PP1 | V PP1
    Det -> 'DT'
    N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$'
    V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG'
    Adj -> 'JJ'
    P -> 'IN'""")

grammar2 = CFG.fromstring("""S -> NP VP
        PP -> P | PP NP | PP VP
        NP -> Det NP | Adj NP | N NP | NP PP | N
        VP -> VP NP | VP PP | V
        Det -> 'DT'
        N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$'
        V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG'
        Adj -> 'JJ'
        P -> 'IN'""")

grammar = grammar1

rdparser, earlyparser = RecursiveDescentParser(grammar), EarleyChartParser(grammar)
Пример #58
0
#!/bin/env python3.5
from nltk import RecursiveDescentParser, CFG, pos_tag, word_tokenize
from nltk.draw.tree import TreeView
from os import system, remove

rdparser = RecursiveDescentParser(CFG.fromstring("""S -> NP VP
    PP -> P | P NP | P VP
    NP -> Det NP PP1 | Adj N PP1 | N PP1 | N NP PP1
    PP1 -> PP PP1 | 
    VP -> V NP PP1 | V PP1
    Det -> 'DT'
    N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$'
    V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG'
    Adj -> 'JJ'
    P -> 'IN'"""))

taggedsent = pos_tag(word_tokenize(''.join(c for c in input('Enter a sentence:') if c not in ':,;."')))
j = 1
for tree in rdparser.parse([x[1] for x in taggedsent]):
    i = iter(taggedsent)
    for s in tree.subtrees():
        if len(s) == 1: s[0] = next(i)[0]
    tv = TreeView(tree)
    tv._size.set(18)
    tv.resize()
    tv._cframe.canvas()['scrollregion'] = (0, 0, 1000,500)
    tv._cframe.print_to_file('output'+str(j)+'.ps')
    if system('convert output'+str(j)+'.ps -alpha off output'+str(j)+'.png') != 0:
       print(tree)
    remove('output'+str(j)+'.ps')
    j += 1
Пример #59
0
 def __init__(self, grammar):
     self.grammar = nltkCFG.fromstring(grammar)