def __init__(self): #comment about what each part of speach is: """ CC - conjunction: or, but, and, either CD - number: one, two, three DT - determiner: a, an, the, both, all, these, any, some EX - the word 'there' IN - preposition: in, of, with, for, under, among, upon, at JJ - adjective: certain, curious, little, golden, other, offended JJS - adjective: -est : best, loveliest, largest JJR - adjective: -er : lerger, smaller, worse MD - can, dare, should, will*, might, could, must NN - common singular noun NNS - common plural noun NNP - proper singular noun NNPS - proper plural noun PDT - all, both, quite, many, half PRP - hers, her, himself, thy, us, it, I, him, you, they PRPP - possesive: his, mine, our, my, her, its, your RB - adverb: very, not, here, there, first, just, down, again, beautifully, -ly RBR - more RBS - adverb superlative: -est RP - participle: up, down, out, away, over, off TO - the word 'to' UH - interjection VB - vocative verb: to ___ VBD - past verb: -ed : was*(freq. occur), had, dipped, were, said, seemed VBG - present verb: -ing: trembling, trying, getting, running, swimming VBN - past verb descriptive: crowded, mutated, fallen, lit, lost, forgtten VBP - present verb: not -s: am, wish, make, know, do, find VBZ - present verb: -s : is*, has, seems WDT - what, which, that* WP - who, what WRB - how, whenever, where, why, when """ # create base of cfg g = CFG.fromstring(""" S -> NPS VPS | NPS VPS | NPS VPS | NPP VPP | VPO | NPO S -> NPS VPS | NPP VPP | NPS VPS NPS -> 'DT' 'NN' | 'DT' 'NN' | 'DT' 'JJ' 'NN' | 'DT' 'JJ' 'NN' NPS -> 'EX' 'the' 'NN' | 'the' 'JJS' 'NN' NPS -> 'she' | 'he' | 'it' | 'I' NPS -> NPS INP | INP NPS NPP -> 'the' 'NNS' | 'the' 'NNS' | 'NNS' NPP -> 'the' 'JJ' 'NNS' NPP -> 'they' | 'you' | 'we' VING -> 'VBG' | 'VBG' | 'RB' 'VBG' VBB -> 'VB' | 'VB' | 'VBP' SM -> 'is' | 'was' | 'has been' VPS -> SM 'VBN' | SM 'VBN' 'like the' 'JJ' 'NN' VPS -> SM VING | SM VING INP VPS -> SM VING 'like' 'DT' 'JJ' 'NN' VPS -> SM VING 'like a' 'NN' INP VPS -> SM 'as' 'JJ' 'as' 'JJ' VPS -> SM 'a' 'JJ' 'NN' VPS -> SM 'a' 'NN' INP VPS -> 'MD' 'have been' VING VPS -> 'is' 'JJ' 'and' 'JJ' VPS -> 'VBD' INP | 'RB' 'VBD' VPS -> SM 'VBD' 'like' 'DT' 'JJ' 'NN' VPS -> SM 'as' 'JJ' 'as the' 'NN' VPS -> 'VBD' 'NN' | 'VBD' 'DT' 'NN' VPS -> 'VBD' 'and' 'VBD' INP 'until' 'VBN' VPS -> VPS 'and' S VPS -> 'VBD' 'JJR' 'than' 'a' 'NN' VPS -> 'VBD' 'EX' VPS -> SM 'JJ' | SM 'VB' INP NPO -> 'a' 'NN' 'IN' 'NNP' NPO -> 'the' 'NN' 'IN' 'the' 'JJ' 'NNP' NPO -> 'the' 'NNS' 'IN' 'the' 'NN' VPO -> 'VBG' 'like' 'NNP' 'RP' 'DT' 'JJ' 'NN' 'IN' 'DT' 'NN' VPO -> 'has been' 'VBG' 'RP' 'and' 'VBG' PM -> 'are' | 'were' | 'have been' VPP -> PM VING | PM VING INP VPP -> PM VING 'like the' 'NNS' INP VPP -> PM 'as' 'JJ' 'as' NPS INP | PM 'JJ' 'like' 'NNS' | PM 'JJ' 'like' VBG 'NNS' VPP -> PM 'VBN' | PM 'VBN' INP VPP -> PM 'as' 'JJ' 'as' 'JJ' | PM 'as' 'JJ' 'as' 'VBG' 'NNS' VPP -> PM 'NNS' INP VPP -> PM 'JJ' 'NNS' VPP -> 'are' 'JJ' 'and' 'JJ' VPP -> 'VBD' INP | 'VBD' 'RP' INP VPP -> PM 'JJ' | PM 'VB' INP INP -> 'IN' 'DT' 'NN' | 'IN' 'the' 'NNS' | 'IN' 'the' 'JJ' 'NNS' INP -> 'IN' 'DT' 'NN' 'IN' 'DT' 'NN' INP -> 'IN' 'DT' 'JJ' 'NN' | 'RP' 'IN' 'DT' 'JJ' 'NN' INP -> 'RP' 'IN' 'DT' 'NN' | 'IN' 'JJ' 'NNS' INP -> 'IN' 'DT' 'NN' | 'RP' 'DT' 'NNS' """) # save grammar to self.cfg self.cfg = CFG.fromstring(str(g).split('\n')[1:]) self.cfg._start = g.start()
def verifygrammar(label, codestring, varname): regexp_tagger = RegexpTagger([ (r"^[0-9]+$", "decimal"), (r"^0x[0-9A-Fa-f]+$", "hexadecimal"), ]) # VARIABLE LINE GENERATION - Assumption - Complex numbers data types are ignored for data mining algorithms if label.tag == 'var': varGrammar = CFG.fromstring(""" S -> VN "=" VV VN -> """ + varname + """ VV -> I | D | ST | B B -> True | False I -> I N | N D -> I"."F F -> F N | N ST -> "'"STI"'" STI -> S N | S C | N | C N -> 0|1|2|3|4|5|6|7|8|9 C -> a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z """) elif label.tag == 'array': arrayGrammar = CFG.fromstring(""" S -> AN "= [" AE "]" AN -> """ + varname + """ AE -> VV AE | VV VV -> I | D | ST | B B -> True | False I -> I N | N D -> I"."F F -> F N | N ST -> "'"STI"'" STI -> S N | S C | N | C N -> 0|1|2|3|4|5|6|7|8|9 C -> a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z """)
def train(): print("Collecting sub-corpus from Penn Treebank (nltk.corpus)") # prepare parsing trees, extrated from treebank tbank_trees = [] for sent in treebank.parsed_sents(): sent.chomsky_normal_form() tbank_trees.append(sent) # build vocabulary list, extracted from treebank vocab_size = 10000 # set vocabulary size to 10000 words = [wrd.lower() for wrd in treebank.words()] vocab = [wrd for wrd,freq in Counter(treebank.words()).most_common(vocab_size)] # generate grammar rules list, extracted from treebank. and calculate their probablity based their frequency tbank_productions = set(production for tree in tbank_trees for production in tree.productions()) tbank_grammar = CFG(Nonterminal('S'), list(tbank_productions)) production_rules = tbank_grammar.productions() rules_to_prob = defaultdict(int) nonterm_occurrence = defaultdict(int) #calculate probablity for rules for sent in tbank_trees: for production in sent.productions(): if len(production.rhs()) == 1 and not isinstance(production.rhs()[0], Nonterminal): production = Production(production.lhs(), [production.rhs()[0].lower()]) nonterm_occurrence[production.lhs()] += 1 rules_to_prob[production] += 1 for rule in rules_to_prob: rules_to_prob[rule] /= nonterm_occurrence[rule.lhs()] # use Katz smoothing rules_to_prob, vocab = katz_smooth(rules_to_prob, vocab) rules = list(rules_to_prob.keys()) rules_reverse_dict = dict((j,i) for i, j in enumerate(rules)) left_rules = defaultdict(set) right_rules = defaultdict(set) unary_rules = defaultdict(set) # classify left, right rules for rule in rules: if len(rule.rhs()) > 1: left_rules[rule.rhs()[0]].add(rule) right_rules[rule.rhs()[1]].add(rule) else: unary_rules[rule.rhs()[0]].add(rule) terminal_nonterms_rules = set(rule for rule in rules_to_prob if len(rule.rhs()) == 1 and isinstance(rule.rhs()[0], str)) terminal_nonterms = defaultdict(int) for rule in terminal_nonterms_rules: terminal_nonterms[rule.lhs()] += 1 pcfg_parser = { 'vocab': vocab, 'left_rules': left_rules, 'right_rules': right_rules, 'unary_rules': unary_rules, 'rules_to_prob': rules_to_prob, 'terminal_nonterms': terminal_nonterms } return pcfg_parser
def respondQuestion(sentence, keyWord, POS): if "Tell me" not in sentence: grammar = "" if POS == "NNPS" or POS == "NNS": grammar = CFG.fromstring(""" S -> H-NP1 Adj VP'?' | Wh-NP VP'?' H-NP1 -> 'How' Wh-NP -> 'Who' | 'What' | 'Where' | 'What' Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry' NP -> Pronoun | Proper-Noun | Noun Pronoun -> 'they' | 'those' Proper-Noun -> '[]' Noun -> 'the <>' VP -> Verb NP Verb -> 'are' """) elif POS == "NN" or "NNP": grammar = CFG.fromstring(""" S -> H-NP1 Adj VP'?' | Wh-NP VP'?' H-NP1 -> 'How' Wh-NP -> 'Who' | 'What' | 'Where' | 'What' Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry' NP -> Pronoun | Proper-Noun | Noun Pronoun -> 'it' | 'that' Proper-Noun -> '[]' Noun -> 'the <>' VP -> Verb NP Verb -> 'is' """) rand_sent_list = [] response = "" for sentence in generate(grammar): rand_sent_list.append(' '.join(sentence)) while True: num = randint(0, len(rand_sent_list)-1) response = rand_sent_list[num] if "<>" in response and (POS == "NNS" or POS == "NN"): index = response.index("<>") response = response[:index] + keyWord + response[index+2:] break if "[]" in response and (POS == "NNPS" or POS == "NNP"): index = response.index("[]") response = response[:index] + keyWord + response[index+2:] break if "<>" not in response and "[]" not in response: break return response else: knowledgeRep(sentence)
def generate_pairs(depth, cfg): ''' num_pairs: Integer denoting the number of translation pairs depth: integer for thedepth of the parse tree in the CFG cfg: chosen grammar, 1, 2 or 3 ''' if (cfg == 1): grammar = CFG.fromstring(""" S -> Y Y -> a Y b | a Y | a | a -> '(' ')' b -> '{' '}' """) elif cfg == 2: grammar = CFG.fromstring(""" S -> X | Y | X Y X -> a Y -> b a -> '(' a ')' | b -> '{' b '}' | """) elif cfg == 3: grammar = CFG.fromstring(""" S -> X X -> a | b a -> '(' a ')' | b -> '{' b '}' | '{' a '}' """) trg = list(generate(grammar, depth=depth)) trg_list = [] for sentence in trg: k = ''.join(sentence) trg_list.append(k) src_list = trg2src(trg) if cfg == 1: A = list((s + 'A ' for s in src_list)) elif cfg == 2: A = list((s + 'B ' for s in src_list)) elif cfg == 3: A = list((s + 'C ' for s in src_list)) else: None B = list((s for s in trg_list)) df = pd.concat([pd.Series(A), pd.Series(B)], axis=1) pairs = (df.iloc[:, 0] + df.iloc[:, 1]).values.tolist() return pairs
def gen_grammar_plural(verb, direct_object, count): try: verb = en.verb.present_participle(verb) except KeyError: return if verb != "": g1 = """ S -> WA TR SUB V DO '?' | W TR SUB V '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'are' | 'were' SUB -> 'they' | 'you' V -> '%s' DO -> 'the %s' """ % (verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 1 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): sentence = ' '.join(sentence) if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence), multiplier * count))
def grammar_extraction(population_g, inital_state, subs): population_s = {} for pop in population_g: p = [inital_state] for n in population_g[pop]: if n != 0: p.append(n) else: pass separ = "\n" prime_grammar = separ.join(p) pre_grammar = prime_grammar.format(subs=subs) pos_grammar = """ {seed} """.format(seed=prime_grammar) post_grammar = """ {seed} """.format(seed=pre_grammar) grammar_use = CFG.fromstring(post_grammar) population_s[pop] = (grammar_use, pos_grammar) return population_s
def main(): parser = argparse.ArgumentParser(description='CKY and PCKY') parser.add_argument('-g', '--grammar', help='Input file name', required=True) parser.add_argument('-s', '--sentence', help='Input sentence', required=True) args = parser.parse_args() grammar_text = None with open(args.grammar, 'r') as f: grammar_text = f.read() grammar = None result = None try: grammar = CFG.fromstring(grammar_text) except ValueError: grammar = PCFG.fromstring(grammar_text) if type(grammar) is CFG: result = cky(args.sentence, grammar) elif type(grammar) is PCFG: result = pcky(args.sentence, grammar)
def gen_grammar3_past_plural(verb, direct_object, count): g1 = """ S -> W TR SUB V '?' | WA TR SUB V DO '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'have' SUB -> PRO PRO -> 'they' |'you' V -> '%s' DO -> 'the %s' """ % (verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 0 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence), multiplier * count))
def rand_sentences(n=10, depth=6, wpt=0.25): #grammar = CFG.fromstring(open('assets/text/grammar.txt', 'r').read()) grammar = CFG.fromstring(rand_vocabulary(wpt)) sentences = list(generate(grammar, n=n * 20, depth=depth)) return [ ' '.join(i) for i in random.sample(sentences, min(n, len(sentences))) ]
def __init__(self, cfg_grammar=None, origin_file='save/origin.txt', oracle_file='save/oracle.txt', wi_dict='save/word_index_dict.json', iw_dict='save/index_word_dict.json', sequence_length=None, generate_from_scratch=False): if cfg_grammar is None: cfg_grammar = """ S -> S PLUS x | S SUB x | S PROD x | S DIV x | x | '(' S ')' PLUS -> '+' SUB -> '-' PROD -> '*' DIV -> '/' x -> 'x' | 'y' """ self.grammar = CFG.fromstring(cfg_grammar) self.origin_file = origin_file self.oracle_file = oracle_file self.wi_dict = wi_dict self.iw_dict = iw_dict self.sequence_length = sequence_length self.generate_from_scratch = generate_from_scratch self.vocab_size = None import os, inspect self.saving_path = os.path.dirname( os.path.abspath(inspect.getfile( inspect.currentframe()))) + '/save/' return
def __init__(self, blackboard): super(SentenceExpert, self).__init__(blackboard, "Sentence Expert") self.eva = ["be", "look", "feel"] self.atv = ["like", "hate", "love", "know", "need", "see"] """ eva - emotional verb active evp - emotional verb passive ej - emotion adjective en - emotional noun atv - attitude verb """ self.grammar = CFG.fromstring(""" S -> P | EP | Person ATV NP P -> NP VP EP -> Person EVA EJ | NP EVP Pron EJ | ENP VP ENP -> EN OF NP NP -> Det N | Det JJ N | Det EJ JJ N | Det EJ N | Det EN VP -> V | V ERB | ERB V Det -> 'the' N -> 'n' V -> 'v' EVA -> 'eva' EVP -> 'makes' EN -> 'en' EJ -> 'ej' JJ -> 'adj' ERB -> 'erb' ATV -> 'atv' Person -> 'person' Pron -> 'pron' OF -> 'of' CC -> 'and' | 'but' | 'because' | 'so' """)
def get_pos_tags(pos_tuples): """ Returns the POS tags from POS tuples of (word, tag) Updates the grammar for unknown tags """ global grammar_string global grammar global terminals changed_grammar = False pos_tags = [] for pos_tuple in pos_tuples: tag = pos_tuple[1] if tag not in terminals: if tag == '\'\'': tag = 'APOS' grammar_string += ' | \'' + tag + '\'' terminals[tag] = None changed_grammar = True pos_tags.append(tag) if changed_grammar: grammar = CFG.fromstring(grammar_string) return pos_tags
def execute(text: str): groucho_grammer = CFG.fromstring(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """) parser = ChartParser(groucho_grammer) tokens = word_tokenize(text=SAMPLE_3) print(type(tokens)) print(tokens) for tree in parser.parse(tokens=[ 'The', 'little', 'bear', 'saw', 'the', 'fine', 'fat', 'trout', 'in', 'the', 'brook', ]): print(tree)
def gen_grammar3_past_plural(verb, direct_object, count): g1 =""" S -> W TR SUB V '?' | WA TR SUB V DO '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'have' SUB -> PRO PRO -> 'they' |'you' V -> '%s' DO -> 'the %s' """%(verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 0 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence) , multiplier*count))
def gen_grammar_plural(verb, direct_object, count): try: verb = en.verb.present_participle(verb) except KeyError: return if verb != "": g1 =""" S -> WA TR SUB V DO '?' | W TR SUB V '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'are' | 'were' SUB -> 'they' | 'you' V -> '%s' DO -> 'the %s' """%(verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 1 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): sentence = ' '.join(sentence) if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence) , multiplier*count))
def __init__(self, phonemes=None, onset=None, coda=None): self.phonemes = phonemes or Phoneme() # use CFG to structure syllables if onset == None: # optional onset onset = 'C | C C | \' \'' elif onset: # mandatory onset onset = 'C | C C' else: # no onset onset = '\' \'' if coda == None: # optional coda coda = 'C | \' \'' elif coda: # mandatory coda coda = 'C' else: # no coda coda = '\' \'' # nucleus is always present # based on the "typical model" grammar = ''' S -> O V K O -> %s K -> %s C -> \'c\' V -> \'v\' ''' % (onset, coda) self.grammar = CFG.fromstring(grammar) self.syllables = self.generate_syllables()
def Tweet_content1(): grammar = CFG.fromstring(demo_grammar) for sentence in generate(grammar, n=4): """generating sentence of 4 words depth""" print(' '.join(sentence)) return sentence
def draw_1(s): m = s l = fool.cut(s)[0] print(l) p = product_grammar(m) grammar = CFG.fromstring(""" S ->NP V NP U L|NP U NP V L| NP U L V NP|L U NP V NP|L V NP U NP|NP V L U NP NP -> N N|r NP|NP A NP|M Q NP|N|NP U NP|A U NP|N NP|NP C NP|NP U|M NP VP ->V|V NP|V VP|A VP|VP NP|VP U|VP C VP|VP P|VP uguo V -> v|vi|vshi N ->n|nr|t|ns|f|nx|nz R ->r C ->c P ->p L ->R|R NP U ->ude|y A ->a|d|ad M ->m Q ->q """ + p) cp = nltk.ChartParser(grammar) tree = cp.parse(l) stree = [] for s in tree: st = [] #s.draw() for i in range(len(s)): st.append([s[i].label(), ''.join(s[i].leaves())]) stree.append(st) return stree
def generate(self, n=10, verb='intransitive', rc='none', pp='none', ident=False): """ Generate input-output pairs with the main auxiliary in the given language. Arguments specify whether the verb should be transitive or intransitive, the position of the relative clause, and the position of the prepositional phrase. The vocabulary used in this function is a random sample (class-wise) if the entire vocabulary to allow for generating sentences in a reasonable amount of time. Args: n: integer number of pairs to be generated verb: 'transitive' or 'intransitive', type of verb rc: 'none', 'subject', or 'object', position of relative clause pp: 'none', 'subject', or 'object', position of prepositional phrase ident: boolean indicating whether output is identical sentence or question Return: list of tuples (input, output, main_aux) """ grammar = CFG.fromstring(self.get_grammar_string(verb, rc, pp)) sentences = list() for sentence in generate_from_cfg(grammar, n=n): sentences.append(Language.transform(sentence, ident)) return sentences
def demo(): """ A demonstration of the recursive descent parser. """ from nltk import parse, CFG grammar = CFG.fromstring(""" S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """) for prod in grammar.productions(): print(prod) sent = 'I saw a man in the park'.split() parser = parse.RecursiveDescentParser(grammar, trace=2) for p in parser.parse(sent): print(p)
def demo(): """ A demonstration of the shift-reduce parser. """ from nltk import parse, CFG grammar = CFG.fromstring( """ S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """ ) sent = 'I saw a man in the park'.split() parser = parse.ShiftReduceParser(grammar, trace=2) for p in parser.parse(sent): print(p)
def setUp(self): if not exists(self.LEXICON_FILE_NAME): self.skipTest("Unable to find file {} as lexicon".format( self.LEXICON_FILE_NAME)) if not exists(self.GRAMMAR_FILE_NAME): self.skipTest("Unable to find file {} as grammar".format( self.GRAMMAR_FILE_NAME)) assert exists(self.PARSE_TREES_FILE_NAME) valid,lexiconText = q1utils.sanitizeAndValidateLexicon( self.LEXICON_FILE_NAME) if not valid: self.skipTest("Lexicon {} is invalid.".format( self.LEXICON_FILE_NAME)) valid,grammarText = q1utils.sanitizeAndValidateGrammar( self.GRAMMAR_FILE_NAME) if not valid: self.skipTest("Grammar {} is invalid.".format( self.GRAMMAR_FILE_NAME)) allRules = grammarText + '\n' + lexiconText try: grammar = CFG.fromstring(allRules) self._parser = BottomUpChartParser(grammar) except Exception as e: self.skipTest(str(e))
def restore(sents, mint=None, maxt=None, minh=None, maxh=None): """Get best infered grammar Parameters ---------- sents: collection of str sentences to use in restoration mint: int check up values of t starting from this value maxt: int check up values of t up to this value minh: int check up values of h starting from this value maxh: int check up values of h up to this value Returns ------- grammar : nltk.CFG """ res = restore_all(sents, mint, maxt, minh, maxh) simplest = min(res.values(), key=cmp_to_key(_cmp_grammar_simplicity)) return CFG.fromstring(simplest)
def __init__(self, cfg_grammar=None, origin_file='save/origin.txt', oracle_file='save/oracle.txt', wi_dict='save/word_index_dict.json', iw_dict='save/index_word_dict.json', sequence_length=None): if cfg_grammar is None: cfg_grammar = """ S -> S PLUS x | S SUB x | S PROD x | S DIV x | x | '(' S ')' PLUS -> '+' SUB -> '-' PROD -> '*' DIV -> '/' x -> 'x' | 'y' """ self.grammar = CFG.fromstring(cfg_grammar) self.origin_file = origin_file self.oracle_file = oracle_file self.wi_dict = wi_dict self.iw_dict = iw_dict self.sequence_length = sequence_length self.vocab_size = None return
def do_grammar_tests(): from nltk import CFG grammar_files = [ 'grammar-mpropp.txt', 'grammar-mpropp2.txt', 'grammar-lakoff.txt', 'grammar-gervas.txt', 'grammar-finlayson.txt' ] grammar_test = [ i.split() for i in open('data/grammar-test-filtered.txt').readlines() ] for i in grammar_files: grammar_file = 'data/' + i print grammar_file, '\t', g = CFG.fromstring(open(grammar_file).read()) #pprint.pprint(g.productions()) coverage = True for i, tokens in enumerate(grammar_test): try: g.check_coverage(tokens) print 1, except Exception as e: print 0, #,e` coverage = False print #rdp = nltk.RecursiveDescentParser(g) #srp = nltk.ShiftReduceParser(g) #bulccp = nltk.BottomUpLeftCornerChartParser(g) if coverage: for i, tokens in enumerate(grammar_test): pass
def perform_function(sentence): # print(sentence) output = "" g_string = (" SIGMA -> DELTA\n" " DELTA -> S P C|S P C A|S P A | S P \n" " A -> Pre Comp \n" " S -> h |m h\n" " C -> m h|h\n" " P -> n l|aux l| l \n" " m -> d e| d\n" " h -> " + name_string + "\n" " l -> 'boarded'|'cooked'|'climbed'|'bought'|'gave'\n" " Pre -> 'ni'\n" " e -> 'black'\n" " d -> 'the'|'The'\n" " aux -> 'n'") gramma = CFG.fromstring(g_string) parser = nltk.ChartParser(gramma) try: ans = parser.parse(sentence.split()) output = " ".join(str(x) for x in list(ans)) except ValueError as e: # print("error : " + str(e)) output = "Error : " + str(e) return output
def generate_sources_grammar(attribute, parent, phase): gr = [ Production(Nonterminal('S'), (Nonterminal('AUX1'), )), Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))), Production(Nonterminal('S1'), ('you', Nonterminal('V1'))), Production(Nonterminal('V1'), ('think', Nonterminal('ART'))), Production(Nonterminal('ATTR'), (attribute, Nonterminal('END'))), Production(Nonterminal('END'), ('?', )) ] if phase == 1: v2 = Production(Nonterminal('V2'), ('included', Nonterminal('ATTR'))) else: v2 = Production(Nonterminal('V2'), ('didn´t include', Nonterminal('ATTR'))) if parent is None: article = Production(Nonterminal('ART'), ('the', Nonterminal('CLS'))) parent = Production(Nonterminal('CLS'), ('sources', Nonterminal('V2'))) else: article = Production(Nonterminal('ART'), ('the', Nonterminal('PAR'))) parent = Production(Nonterminal('PAR'), (parent, Nonterminal('V2'))) gr.append(v2) gr.append(article) gr.append(parent) grammar = CFG(Nonterminal('S'), gr) return grammar
def context_free_grammar(): cfg = CFG.fromstring("""\ ################# Rules ################# S -> NP VP S -> PP NP VP S -> Wh Aux NP VP NP -> ProperNoun | CC ProperNoun | N | ProperNoun NP | AP N | DET NP | N PP VP -> V | V NP | Adv VP | V NP VP AP -> Adj | Adj AP PP -> P NP | P NP VP ################# Lexicons ################# N -> 'milk'| 'shoes' | 'salad' | 'kitchen' | 'midnight' | 'table' V -> 'laughs' | 'laughed' | 'drink' | 'wears' | 'serves' | 'drinks' | 'thinks' | 'wear' ProperNoun -> 'Bart' | 'Homer' | 'Lisa' Aux -> 'do' | 'does' CC -> 'and' Adj -> 'blue' | 'healthy' | 'green' DET -> 'a' | 'the' Adv -> 'always' | 'never' P -> 'in' | 'before' | 'on' | 'when' Wh -> 'when' """) cfparser = ChartParser(cfg) sents = text.splitlines() for sent in sents: parses = cfparser.parse(sent.split()) print(sent) for tree in parses: print(tree)
def generate_events_grammar(attribute, parent, phase): gr = [ Production(Nonterminal('S'), (Nonterminal('AUX1'), )), Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))), Production(Nonterminal('S1'), ('you', Nonterminal('V1'))), Production(Nonterminal('V1'), ('think', Nonterminal('ART'))), Production(Nonterminal('ATTR'), (attribute, Nonterminal('END'))), Production(Nonterminal('END'), ('?', )) ] if parent is not None: art = Production(Nonterminal('ART'), ('the', Nonterminal('PAR'))) par = Production(Nonterminal('PAR'), (parent, Nonterminal('V2'))) else: art = Production(Nonterminal('ART'), ('the', Nonterminal('PAR'))) par = Production( Nonterminal('PAR'), ('events that caused the incident', Nonterminal('V2'))) if phase == 1: v2 = Production(Nonterminal('V2'), ('included', Nonterminal('ATTR'))) else: v2 = Production(Nonterminal('V2'), ('did not include', Nonterminal('ATTR'))) gr.append(art) gr.append(par) gr.append(v2) grammar = CFG(Nonterminal('S'), gr) return grammar
def demo(): """ A demonstration of the recursive descent parser. """ from nltk import parse, CFG grammar = CFG.fromstring( """ S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """ ) for prod in grammar.productions(): print(prod) sent = "I saw a man in the park".split() parser = parse.RecursiveDescentParser(grammar, trace=2) for p in parser.parse(sent): print(p)
def main(): source = "./grammar.cfg" sentences = [ "skywalker sarà tuo apprendista", #tuo apprendista skywalker sarà "tu avrai novecento anni di età", # novecento anni di età tu avrai "tu hai amici lì", # amici lì tu hai "noi siamo illuminati", # illuminati noi siamo "il lato oscuro è arduo da vedere", # arduo da vedere il lato oscuro è "tu hai molto da apprendere ancora", # molto da apprendere ancora tu hai "skywalker corre veloce", # veloce Skywalker corre "il futuro di questo ragazzo è nebuloso" ] # nebuloso il futuro di questo ragazzo è with open(source, encoding='utf-8') as file: grammar = CFG.fromstring(file.read()) #print(grammar) i = 0 if grammar.is_chomsky_normal_form(): for sent in sentences: it_tree = cky(sent.split(), grammar) save_tree("it" + str(i), it_tree) it_tree.draw() if (it_tree is not None): yoda_tree = translate_it_yo(it_tree) save_tree("yo" + str(i), yoda_tree) yoda_tree.draw() i += 1 else: exit('Error: the grammar must be in Chomsky Normal Form')
def get_parser_for_grammar(input_code='program.gir', grammar_name='grammar'): terminal_rules = get_terminal_rules(read_lines(input_code)) with open(grammar_name, 'r') as f: lines = '\n'.join([x for x in f.readlines() if x[0] != '#']) lines = lines + '\n' + '\n'.join(terminal_rules) return nltk.ChartParser(CFG.fromstring(lines))
def demo(): """ A demonstration of the shift-reduce parser. """ from nltk import parse, CFG grammar = CFG.fromstring( """ S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """ ) sent = "I saw a man in the park".split() parser = parse.ShiftReduceParser(grammar, trace=2) for p in parser.parse(sent): print(p)
def generate_from_grammar(self, n, depth): grammar = CFG.fromstring(self.gramma) print("Generuje dla n " + n + " i depth " + depth) for track in generate(grammar, n=int(n), depth=int(depth)): self.track_array.append(' '.join(track)) # produkcje numbers = " ".join(track) self.productions.append(numbers)
def restore_all(sents, mint=None, maxt=None, minh=None, maxh=None): """Get all infered grammars For all combinations of parameters `t` and `h` there may be a different grammars Grammar syntax example: S -> 'c' A 'a' B | 'b' A -> 'a' A | 'A' B -> 'b' A Parameters ---------- sents: collection of str sentences to use in restoration mint: int check up values of t starting from this value maxt: int check up values of t up to this value minh: int check up values of h starting from this value maxh: int check up values of h up to this value Returns ------- grammars : dict of str grammar strings for every valid pair of t and h """ maxlen = len(max(sents, key=len)) mint = mint if mint is not None else 1 minh = minh if minh is not None else 1 maxt = maxt if maxt is not None else maxlen maxh = maxh if maxh is not None else maxlen res = {} for t, h in itertools.product(range(mint, maxt + 1), range(minh, maxh + 1)): p = Pnet(sents) p = net_transform(p, t, h) _, g_str = net_to_grammar(p, t) g = CFG.fromstring(g_str) if all(check_grammar(g, s) for s in sents): print(f'Success with t={t}, h={h}') print(g_str, '\n') res[(t, h)] = g_str else: print(f'Fail with t={t}, h={h}') return res
def generate_name(G): grammar = CFG.fromstring(G) parser = ChartParser(grammar) gr = parser.grammar() tokens = produce(gr, gr.start()) name = ''.join(tokens) return name.title()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-u', '--upper', type=int, required=True, help='Model size upper bound') parser.add_argument('-d', '--depth', type=int, required=True, help='Maximum CFG production depth considered') parser.add_argument('-v', '--verbose', action='store_true', help='Show progress and timing') parser.add_argument('-i', '--input', type=str, required=True, help='Path to text file containing CFG specification') parser.add_argument('-e', '--expr', type=str, required=True, help='Path quantifier expressions should' ' be saved in') parser.add_argument('-b', '--bit', type=str, required=True, help='Path quantifier bitstrings should be saved in') args = parser.parse_args() upper = args.upper max_depth = args.depth verbose = args.verbose in_file = args.input expr_file = args.expr bit_file = args.bit with open(in_file, 'r') as f: grammar_str = f.read() # NLTK does not like unnecessary indentation pattern = re.compile(r'\n\s+\|') grammar_str = pattern.sub(' |', grammar_str) grammar = CFG.fromstring(grammar_str) qg = QuantifierGenerator(grammar, upper, max_depth, verbose) with open(expr_file, 'w') as f_expr: with open(bit_file, 'wb') as f_bit: for expr, q_str in qg.generate(): f_expr.write(f'{expr}\n') f_bit.write(q_str.tobytes())
def from_cfg_file(cls, path: str, **kwargs) -> "CFGrammarNode": """ :param path: path to file containing a context-free grammar :return: new Derivation tree node """ assert os.path.exists(path) with open(path) as file: str_grammar = file.read() nltk_grammar = CFG.fromstring(str_grammar) return cls(nltk_grammar.start(), nltk_grammar, **kwargs)
def chart_parse(in_file, grammar_file, out_file): text = unicode(open(in_file, 'r').read(), errors='ignore') output = open(out_file, 'w') grammar_string = unicode(open(grammar_file, 'r').read(), errors='ignore') try: grammar = CFG.fromstring(grammar_string) parser = nltk.ChartParser(grammar) sentences = nltk.sent_tokenize(text) for sentence in sentences: words = nltk.word_tokenize(sentence) tree = parser.parse(words) for item in tree: output.write(str(item)) output.write('\n') except Exception, e: message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e) sys.stderr.write(message) sys.exit()
def someGrammaticalDilemmas(): print "page 292 8.1 Some Grammatical Dilemmas" print "=============== Linguistic Data and Unlimited Possibilities ===============" from nltk import CFG groucho_grammar = CFG.fromstring(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """) sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas'] parser = nltk.ChartParser(groucho_grammar) trees = parser.nbest_parse(sent) for tree in trees: print tree
def CFG_grammar(): GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE') usr_goal = ENTITY_PLACE usr_find = GOAL_FIND VP,NP,O = nonterminals('VP,NP,O') # Build a CFG based on the symbols that generated above. grammar = CFG.fromstring(""" VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE NP -> P ENTITY_PLACE | ENTITY_PLACE GOAL_FIND -> 'find' GOAL_FIND -> 'show' GOAL_FIND -> 'tell' O -> 'me' P -> 'in' ENTITY_PLACE -> 'starbucks' ENTITY_PLACE -> 'the starbucks' ENTITY_PLACE -> 'a starbucks' ENTITY_PLACE -> 'coffee bean' ENTITY_PLACE -> 'the coffee bean' ENTITY_PLACE -> 'a coffee bean' """) return grammar
# Filter each sentence and return them all. def eliminate(sentence): sents=nltk.sent_tokenize(sentence) for sent in sents: str=filter(sent) return str #Here input is the chosen option on UI. #Given IDs to each question as per NCERT Book,input will be given that chosen value. input=26 # Generate variations of a particular question based on the input and its corresponding grammar. if input==2: g=CFG.fromstring(g1) g2=CFG.fromstring(g2) rd_parser=nltk.RecursiveDescentParser(g) for sent,sent2 in zip(generate(g2,n=100),generate(g,n=100)): newsent1=' '.join(sent) newsent2=' '.join(sent2) ans1=eliminate(newsent1) ans2=eliminate(newsent2) if(ans1 == None or ans2 == None): pass else: print(ans1) print(ans2) print("Determine the length and breadth") print("\n") elif input==4:
def load_grammar( self ): s = open( self.name + '.cfg' ).read() self.grammar = CFG.fromstring(s) return
def output(request): # Validation of form if request.method == "POST": # Validation of request if 'inputURL' in request.POST: # Validation of image url imageURL = request.POST.get('inputURL') image_output = imageURL indexOfDot = imageURL.rfind(".") if indexOfDot == -1: return fail(request) # not an image URL indexOfDot += 1 extension = imageURL[indexOfDot:] if extension != 'jpg' and extension != 'jpeg' and extension != 'png': return fail(request) # not a valid image (jpg, jpeg, png) client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6' secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN' clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set. return output(request, makes{image_output:'image_output', text_output:'text_output'}) result = clarifai_api.tag_image_urls(imageURL) except ApiError: #return fail(request) messages.add_message(request, messages.INFO, "ApiError") return HttpResponseRedirect('makestory/fail.html') class_list = result['results'][0]['result']['tag']['classes'] prob_list = result['results'][0]['result']['tag']['probs'] class_str = "" for i in range(0, len(class_list)): class_str += class_list[i] + " " # currently just the list of matched words text_output = class_list.__str__() # Parts of speech recognition tokens = nltk.word_tokenize(class_str) dictionary = PyDictionary() nouns = [] verbs = [] adjectives = [] otherPos = [] for word in tokens: definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4 assignment = definition.keys()[0] # Get the part of speech from the dictonary # assignment = tuple[1] if assignment == 'Noun': nouns.append(word) elif assignment == 'Verb': verbs.append(word) elif assignment == 'Adjective': adjectives.append(word) else: otherPos.append(word) # Create the grammar #P:prepositions, DET:articles, adverbs P = ["on","in","at","since","for","ago","before","to","past","to","until","by","in","at","on","under","below","over","above","into","from","of","on","at"] DET = ["the","a","one","some","few","a few","the few","some"] assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc. pos_tags = [] pos_words = {} for tuple in assignments: word = tuple[0] pos = tuple[1] if pos in pos_words: pos_words[pos].append(word) else: pos_words[pos] = [] pos_tags.append(pos) grammar = """ S -> NP VP PP -> P NP NP -> Det N | Det N PP VP -> V NP | VP PP Det -> 'DT' """ # N -> 'NN' # V -> 'VBZ' # P -> 'PP' # adverb is RB if 'NN' in pos_words: grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n' if 'VB' in pos_words: grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n' if 'JJ' in pos_words: grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n' simple_grammar = CFG.fromstring(grammar) #simple_grammar.start() simple_grammar.productions() sentences = [] for sentence in generate(simple_grammar, n=10): sentences.append(' '.join(sentence)) # parser = nltk.ChartParser(simple_grammar) # tree = parser.parse(pos_tags) caption = 'this is a caption' story = 'this is the story' return render(request, 'makestory/output.html', { 'nouns_output': nouns, 'verbs_output': verbs, 'adjectives_output': adjectives, 'otherPos_output': otherPos, 'imageURL_output': imageURL, 'caption_output': caption, 'story_output': story, 'sentences_test_output': sentences, } )
# Tokenize the sentence. tokenized = word_tokenize(words) # Build the grammar for parsing. GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE') usr_goal = ENTITY_PLACE usr_find = GOAL_FIND VP,NP,O = nonterminals('VP,NP,O') grammar = CFG.fromstring(""" VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE NP -> P ENTITY_PLACE | ENTITY_PLACE GOAL_FIND -> 'find' GOAL_FIND -> 'show' GOAL_FIND -> 'tell' O -> 'me' P -> 'in' ENTITY_PLACE -> 'starbucks' ENTITY_PLACE -> 'Starbucks' ENTITY_PLACE -> 'Coffee Bean' ENTITY_PLACE -> 'Coffeebean' """) rd_parser = RecursiveDescentParser(grammar) # Parsing the sentence. parsed_words = [] for parsing in rd_parser.parse(tokenized): print(parsing) # Find GOAL and ENTITY for detect in parsing:
# V -> 'VBZ' # P -> 'PP' # adverb is RB if 'NN' in pos_words: grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n' if 'VB' in pos_words: grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n' if 'JJ' in pos_words: grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n' simple_grammar = CFG.fromstring(grammar) #simple_grammar.start() simple_grammar.productions() sentences = [] for sentence in generate(simple_grammar, n=10): sentences.append(' '.join(sentence)) # parser = nltk.ChartParser(simple_grammar) # tree = parser.parse(pos_tags) caption = 'this is a caption' story = 'this is the story'
import nltk from nltk import CFG grammar = CFG.fromstring(""" S -> NP VP NP -> Det Noun | Noun Adj VP -> Verb NP Det -> 'el' Noun -> 'gato' | 'pescado' Verb -> 'come' Adj -> 'crudo' """) def dibujo_arbol(texto): sent = texto.split() parser = nltk.ChartParser(grammar) for tree in parser.parse(sent): print(tree) tree.draw() dibujo_arbol('el gato come pescado crudo') dibujo_arbol('gato crudo come el gato') dibujo_arbol('el pescado come gato crudo')
from nltk import CFG from nltk import parse from nltk import Tree grammar = CFG.fromstring(''' S -> WHO QP QM | WHICH Nom QP QM QP -> VP | DO NP T VP -> I | T NP | BE A | BE NP | VP AND VP NP -> P | AR Nom | Nom Nom -> AN | AN Rel AN -> N | A AN Rel -> WHO VP | NP T N -> "Ns" | "Np" I -> "Is" | "Ip" T -> "Ts" | "Tp" A -> "A" P -> "P" BE -> "BEs" | "BEp" DO -> "DOs" | "DOp" AR -> "AR" WHO -> "WHO" WHICH -> "WHICH" AND -> "AND" QM -> "?" ''') chartpsr = parse.ChartParser(grammar) def all_parses(wlist,lx): """returns all possible parse trees for all possible taggings of wlist"""
''' Generate horoscopes ''' import logging from nltk.grammar import Nonterminal from nltk import CFG from os import path import random import re HERE = path.abspath(path.dirname(__file__)) try: GRAMMAR = CFG.fromstring(open('%s/data/grammar.txt' % HERE).read()) except IOError: logging.error('Unable to load grammar file') raise IOError def get_sentence(start=None, depth=7): ''' follow the grammatical patterns to generate a random sentence ''' if not GRAMMAR: return 'Please set a GRAMMAR file' start = start if start else GRAMMAR.start() if isinstance(start, Nonterminal): productions = GRAMMAR.productions(start) if not depth: # time to break the cycle terminals = [p for p in productions if not isinstance(start, Nonterminal)] if len(terminals): production = terminals production = random.choice(productions)
''' from nltk.parse.generate import generate #, demo_grammar from nltk import CFG demo_grammar = """ S -> NP VP NP -> Det N PP -> P NP VP -> 'slept' | 'saw' NP | 'walked' PP Det -> 'the' | 'a' N -> 'man' | 'park' | 'dog' P -> 'in' | 'with' """ grammar = CFG.fromstring(demo_grammar) print(grammar) #Join words and generate based off of grammar - for n for sentence in generate(grammar, n=12): print(' '.join(sentence)) ''' Notes: Need to symbolize the grammar Have the machine process the language Need to integrate with Markov chain - file 'agiliq-markov.py' ''' for sentence in generate(grammar, depth=4): print(' '.join(sentence))
def main(): while 1 == 1 : print("Enter a statement") statement = raw_input().strip() if statement == '': continue if statement.lower() in ['bye','goodbye','tata','good-bye']: print("Good-bye, dear human") exit() userNameLoader() #loads the username tagged_arr = Viterbi(statement) tokens = word_tokenize(statement) isFile = False isDir = False #check if all of the elements are same count = 1 tag = tagged_arr[1] for i in range(2,len(tagged_arr)): if tagged_arr[i] == tag: count = count + 1 if count == len(tagged_arr)-1: n = len(tokens) for i in range(0,n): tag_temp = Viterbi(tokens[i])[1] tagged_arr[i+1] = tag_temp for i in range(0,len(tokens)): if i+2 <= len(tokens): if tokens[i] in ['folder','file','directory'] and tagged_arr[i+2] in ['VB','VBN']: tagged_arr[i+1] = 'NN' elif tokens[i] in ['folder','file','directory'] and tagged_arr[i] in ['VB','VBN']: tagged_arr[i+1]='NN' for i in range (0,len(tokens)): if tagged_arr[i+1] in ['NN','NNS','NP','VB','AN','JJ'] and tokens[i]!= 'open': for j in range(0,len(appnames)): if tokens[i].lower() in appnames[j] and tokens[i].lower() not in ['file','folder','directory','copy','videos','desktop']: tagged_arr[i+1]='AN' tokens[i] = commands[j] isFile = True break if isDirName(userName,tokens[i])==True: tagged_arr[i+1] = 'AN' isDir = True elif isFileName(userName,tokens[i])==True: tagged_arr[i+1] = 'AN' isFile = True for i in range (0,len(tokens)): if tokens[i] in verbList: tagged_arr[i+1] = 'VB' break elif tokens[i] in ['words','lines']: tagged_arr[i+1] = 'NNS' break #print(tagged_arr) grammar_string = """ S -> NPP VP S -> VP NPP -> MODAL PRONOUN | NOUN VA | APPNAME NPP -> DET FOLDER VERB NAME | FOLDER VERB NAME| FOLDER NAME | DET NAME NPP -> DET JJ FOLDER VERB NAME | JJ FOLDER VERB NAME| JJ FOLDER NAME NPP -> DET AN FOLDER VERB NAME | AN FOLDER VERB NAME| AN FOLDER NAME NPP -> DET APPNAME NPP -> BACK TONAME | DET BACK TONAME NPP -> WQUERY WQUERY -> WQL AP NOUN | WRB AP NOUN BACK -> 'background' | 'BACKGROUND' | 'Background' BACK -> 'wallpaper' | 'WALLPAPER' | 'Wallpaper' BACK -> AN TONAME -> TO FILENAME | TO DET FILENAME CPY -> DET FILENAME SOURCE DESTINATION | DET FILENAME DESTINATION SOURCE CPY -> FILENAME SOURCE DESTINATION | FILENAME DESTINATION SOURCE SOURCE -> IN SOURCER SOURCER -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME SOURCER -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME DESTINATION -> TO DESTINATIONR DESTINATIONR -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME DESTINATIONR -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME FOLDER -> 'folder'|'directory'|'file'|'Folder'|'File'|'Directory'|'FOLDER'|'FILE'|'DIRECTORY' FOLDER -> NN VP -> VERB NPP | VERB VP | ADVERB VP | VERB CPY VP -> BER RB IN PPS PPS -> DET PP | PP PP -> JJ NOUN | NOUN | FOLDER VBN DET FILENAME | FOLDER VBN FILENAME | FOLDER FILENAME | FOLDER DET FILENAME PP -> FILENAME MODAL -> MD PRONOUN -> PPSS | PPO VA -> VERB APPNAME APPNAME -> AN VERB -> VB | VBN ADVERB -> RB DET -> AT NOUN -> NN | NP | NNS FILENAME -> AN """ str = 'NAME -> ' for i in range(1,len(tagged_arr)): str+=tagged_arr[i] if i < len(tagged_arr)-1: str+=" | " str+="\n" grammar_string += str #add POS tags tl = len(tagged_arr) for i in range(1,tl): if tokens[i-1] not in ['file','folder','directory']: grammar_string+=tagged_arr[i]+" -> \'"+tokens[i-1]+"\'\n" simple_grammar = CFG.fromstring(grammar_string) #print(simple_grammar) parser = nltk.ChartParser(simple_grammar) json_str = '' ANs= [] ANJSON = [] VBs = [] VBJSON = [] NAMEs= [] NJSON = [] CCYs = [] SOURCEs = [] DESTs = [] FILENAMEs = [] TONAMEs = [] TONAMEFILEs = [] PPs = [] PPANs = [] WQUERY = [] OBJ = [] for tree in parser.parse(tokens): #print(tree) ANs = list(tree.subtrees(filter=lambda x: x.label()=='AN')) VBs = list(tree.subtrees(filter=lambda x: x.label()=='VERB')) NAMEs = list(tree.subtrees(filter=lambda x: x.label()=='NAME')) CCYs = list(tree.subtrees(filter=lambda x:x.label()=='CCY')) SOURCEs = list(tree.subtrees(filter=lambda x:x.label()=='SOURCER')) SOURCEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), SOURCEs) DESTs = list(tree.subtrees(filter = lambda x:x.label()=='DESTINATIONR')) DESTs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), DESTs) FILENAMEs = list(tree.subtrees(filter = lambda x:x.label()=='FILENAME')) FILENAMEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), FILENAMEs) TONAMEs = list(tree.subtrees(filter=lambda x:x.label()=='TONAME')) TONAMEFILEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), TONAMEs) PPs = list(tree.subtrees(filter = lambda x:x.label()=='PP')) PPANs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), PPs) WQUERY = list(tree.subtrees(filter = lambda x:x.label()=='WQUERY')) OBJ = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='NOUN')), WQUERY) if(len(PPANs)>0): PPANs = PPANs[0][0] PPANs = tree2json(PPANs) OBJ = tree2json(OBJ[0][0]) obj = OBJ['NOUN'][0] nounArr = ['NNS','NP','NN'] for n in nounArr: if n in obj: obj = obj[n] break obj = obj[0] counter(PPANs['AN'][0],obj) for i in xrange(0,len(ANs)): ANJSON.append(tree2json(ANs[i])) for i in xrange(0,len(VBs)): VBJSON.append(tree2json(VBs[i])) for i in xrange(0,len(NAMEs)): NJSON.append(tree2json(NAMEs[i])) for i in xrange(0,len(VBs)): verbRoot = VBJSON[i]['VERB'] if 'VB' in verbRoot[0]: if verbRoot[0]['VB'][0] in ['open','close','shut','exit']: if isFile == True: actionSequence(verbRoot[0]['VB'][0],ANJSON,True) elif isDir == True: actionSequence(verbRoot[0]['VB'][0],ANJSON,False) elif verbRoot[0]['VB'][0] in ['make','create']: #if isDir == True: createSequence(verbRoot[0]['VB'][0],NJSON,str.rstrip('\n')) elif verbRoot[0]['VB'][0] in ['copy','cut','move','duplicate']: SOURCEs = tree2json(SOURCEs[0][0]) DESTs = tree2json(DESTs[0][0]) FILENAMEs = tree2json(FILENAMEs[0][0]) cutCopy(verbRoot[0]['VB'][0],FILENAMEs,SOURCEs,DESTs) elif verbRoot[0]['VB'][0] in ['change','replace']: changeWallpaper(verbRoot[0]['VB'][0],tree2json(TONAMEFILEs[0][0]))
with_blank_spaces = ' ' ############################################ ############################################ ############################################ def choose_line(some_lines):#5 return a_random.choice(#7 some_lines).lower() #5 ############################################ ############################################ choose = choose_line #5 g = G.fromstring(#7 this_is_the_grammar) #5 ############################################ ############################################ while not len(pentas):#5 for poem in generate(g, #7 start=N('five')): #5 ############################################ ############################################ pentas.append(#5 with_blank_spaces.join(poem))#7 fives = pentas #5 ############################################
regex = re.compile("(\w+\s*),(\s*\w+\s*)(,|(and))+(\s*(and)?\s*\w+)") #reg = re.compile("\((,|!|\?)\)\1") #regex2 = re.compile("\((,|!|\?)\)(\s*\w+\s*)+\1") #regex2 = re.compile("\(,\)(\s*\w+\s*)+\1") regex2 = re.compile(",(\s*\w+\s*)+,") #regex3 = re.compile("!(\s*\w+\s*)+!") #regex3 = re.compile("\((\s*\w+\s*)+\)(\s*\w+\s*)*\((,|!|\?)\)\1(\s*\w+\s*)*\2\1(\s*\w+\s*)*\2?") #regex4 = re.compile("(\s*\w+\s*)*\((\s*\w+\s*)+\)\((,|!|\?)\)(\s*\w+\s*)*\1\2(\s*\w+\s*)*\1\2?") #triple_to_dist = {} list_reg = re.compile("(\w|\s)\s*\)") grammar1 = CFG.fromstring(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | Det A N | A N | N PP | "PRP$" N | N | "PRP$" A N | A N PP | N A PP | NP CC NP | NP NP NP | NP NP CC NP VP -> V NP | VP NP | VP PP | AV V | AV V NP | V AV | V AV NP | VP PP | V | VP CC VP | VP VP VP | VP VP CC VP Det -> "DT" V -> "VBZ" | "VB" | "VBG" | "VBN" | "VBD" | "VBP" P -> "PP" | "IN" A -> "JJ" | "JJR" | "JJS" AV -> "RB" | "RBR" | "RBS" N -> "NN" | "NNS" | "NNP" | "NNPS" | "PRP" | "CD" """) parser1 = nltk.ChartParser(grammar1) grammar2 = CFG.fromstring(""" T -> S S S S -> NP VP PP -> P NP NP -> Det N | Det N PP | Det A N | A N | N PP | "PRP$" N | N | "PRP$" A N | A N PP | N A PP | NP CC NP | NP NP NP | NP NP CC NP VP -> V NP | VP NP | VP PP | AV V | AV V NP | V AV | V AV NP | VP PP | V | VP CC VP | VP VP VP | VP VP CC VP Det -> "DT" V -> "VBZ" | "VB" | "VBG" | "VBN" | "VBD" | "VBP" P -> "PP" | "IN"
#!/bin/env python3.5 from nltk import RecursiveDescentParser, pos_tag, CFG, Tree from nltk.parse.earleychart import EarleyChartParser from nltk.draw import TreeView from os import system, remove grammar1 = CFG.fromstring("""S -> NP VP PP -> P | P NP | P VP NP -> Det NP PP1 | Adj N PP1 | N PP1 | N NP PP1 PP1 -> PP PP1 | VP -> V NP PP1 | V PP1 Det -> 'DT' N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$' V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG' Adj -> 'JJ' P -> 'IN'""") grammar2 = CFG.fromstring("""S -> NP VP PP -> P | PP NP | PP VP NP -> Det NP | Adj NP | N NP | NP PP | N VP -> VP NP | VP PP | V Det -> 'DT' N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$' V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG' Adj -> 'JJ' P -> 'IN'""") grammar = grammar1 rdparser, earlyparser = RecursiveDescentParser(grammar), EarleyChartParser(grammar)
#!/bin/env python3.5 from nltk import RecursiveDescentParser, CFG, pos_tag, word_tokenize from nltk.draw.tree import TreeView from os import system, remove rdparser = RecursiveDescentParser(CFG.fromstring("""S -> NP VP PP -> P | P NP | P VP NP -> Det NP PP1 | Adj N PP1 | N PP1 | N NP PP1 PP1 -> PP PP1 | VP -> V NP PP1 | V PP1 Det -> 'DT' N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$' V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG' Adj -> 'JJ' P -> 'IN'""")) taggedsent = pos_tag(word_tokenize(''.join(c for c in input('Enter a sentence:') if c not in ':,;."'))) j = 1 for tree in rdparser.parse([x[1] for x in taggedsent]): i = iter(taggedsent) for s in tree.subtrees(): if len(s) == 1: s[0] = next(i)[0] tv = TreeView(tree) tv._size.set(18) tv.resize() tv._cframe.canvas()['scrollregion'] = (0, 0, 1000,500) tv._cframe.print_to_file('output'+str(j)+'.ps') if system('convert output'+str(j)+'.ps -alpha off output'+str(j)+'.png') != 0: print(tree) remove('output'+str(j)+'.ps') j += 1
def __init__(self, grammar): self.grammar = nltkCFG.fromstring(grammar)