示例#1
0
def drivel(noun):
    """ Generates drivel by shifting nouns in the description of the shifted noun,
        and prepending random alliterative adjectives.
    """
    drivel = []
    description = shift(noun)[1]
    description = description.split(";")[0]
    for sentence in parsetree(description):
        for i, w in enumerate(sentence.words):
            w, tag = w.string, w.tag
            if tag in ("VBD", "VBZ"):
                w = conjugate(w, "infinitive")
                w = conjugate(w, "past")
            if tag == "NN": # noun
                try:
                    w = shift(w)[0]
                    a = list(alliterate(w))
                    if a:
                        if i > 0 and sentence.words[i].tag == "JJ": # adjective
                            drivel.pop()
                        drivel.append(choice(a))
                except:
                    pass
            drivel.append(w)
    return " ".join(drivel)
示例#2
0
def negate_verb_leaves(leaf_nodes):
    """
    Takes a list of leaf nodes and tries to negate it by searching for "not"

    TODO: This code could be cleaner. It's repeated all over the place.
    """
    idx = None
    for i in xrange(len(leaf_nodes)):
        leaf = leaf_nodes[i]
        # We have to call is_leaf because sometimes we have
        # non-verb nodes treated as leaves
        if is_leaf(leaf) and leaf.label() == 'RB' and leaf[0].lower() == 'not':
            idx = i
            break

    if idx:
        negative = leaf_nodes
        positive = leaf_nodes[:idx] + leaf_nodes[idx+1:]
    else:
        positive = leaf_nodes
        if len(leaf_nodes) == 1 and conjugate(leaf_nodes[0][0], 'VB') != 'be':
            pos = leaf_nodes[0].label()
            negative = [
                Tree(pos, [conjugate('do', pos)]),
                Tree('RB', ['not']),
                Tree('VB', [conjugate(leaf_nodes[0], 'VB')])
            ]
        else:
            negative = [leaf_nodes[0], Tree('RB', ['not'])] + leaf_nodes[1:]

    return (positive, negative)
示例#3
0
文件: helpers.py 项目: ZacharyWFox/it
def cj(word, num):
    ''' Conjugate verb based on count '''
    if num != 1:
        word = conjugate(word, 'pl')  # Conjugate as plural
    else:
        word = conjugate(word, '3sg')  # Conjugate in 3rd-person singular

    return word
示例#4
0
def testBasic():
    from pattern.en import referenced
    print referenced('hour')
    
    from pattern.en import conjugate, lemma, lexeme
    print lexeme('purr')
    print lemma('purring')
    print conjugate('purred', '3sg') # he / she / it
示例#5
0
def find_verb_form(original_form, original_lemma, new_lemma):
    """
    Figure out original tense of the verb, then apply that tense to new_lemma
    There might be more than one, let's keep it simple and just apply
    the first one
    """
    possible_conjugations = tenses(original_form)
    if len(possible_conjugations) > 1:
        return conjugate(new_lemma, possible_conjugations[1])
    else:
        return conjugate(new_lemma, possible_conjugations[0])
示例#6
0
 def explore_syn_tree(self, word_tuple, pos=None):
     if self.check_in_words(word_tuple[0]):
         return word_tuple[0]
     if word_tuple[0] == "difficult":
         return "hard"
     if word_tuple[0] == "saturn":
         return "up goer"
     synsets = None
     posSet = False
     if pos is None:
         synsets = wn.synsets(word_tuple[0])
     else:
         synsets = wn.synsets(word_tuple[0], pos=pos)
         posSet = True
     for synset in synsets:
         matching_hypernym = self.explore_hypernyms(synset)
         if matching_hypernym != None:
             if pos == wordnet.NOUN:
                 if self.is_plural(word_tuple[0]):
                     # The word is a plural, yo.
                     return self.inflect_engine.plural(matching_hypernym)
             if pos == wordnet.VERB:
                 pattern_tag = self.get_pattern_tense(word_tuple[1])
                 person = 1
                 if word_tuple[1] == "VBZ":
                     person = 3
                 matching_hypernym = conjugate(matching_hypernym, tense=pattern_tag, person=person, parse=True)
                 return matching_hypernym
             return matching_hypernym
         matching_hyponym = self.explore_hyponyms(synset)
         if matching_hyponym != None:
             if pos == wordnet.NOUN:
                 if self.is_plural(word_tuple[0]):
                     # The word is a plural, yo.
                     return self.inflect_engine.plural(matching_hyponym)
             if pos == wordnet.VERB:
                 pattern_tag = self.get_pattern_tense(word_tuple[1])
                 person = 1
                 if word_tuple[1] == "VBZ":
                     person = 3
                 matching_hypornym = conjugate(matching_hyponym, tense=pattern_tag, person=person, parse=True)
                 return matching_hyponym
             return matching_hyponym
     # Some handling logic
     wn_pos = word_tuple[1]
     possible_solutions = set()
     for tagged_tuple in self.tagged_words:
         if wn_pos == tagged_tuple[0][1]:
             possible_solutions.add(tagged_tuple[0][0])
     return random.sample(possible_solutions, 1)[0]
示例#7
0
def stem_word(word):
    try:
        if word.endswith("s"):
            if singularize(word) in nltk_words:
                return singularize(word)
            else:
                return word
        if word.endswith("d") or word.endswith('ing'):
            if conjugate(word) in nltk_words:
                return conjugate(word)
            else:
                return word
    except:
        return word
    return word
示例#8
0
def dictionary_tag(sentence):
	client = MongoClient('127.0.0.1')
	db = client.dictionaries
	collection = db.sfu
	emots = db.emoticons
	for word in sentence:
		qu = emots.find_one({'word':word[0].encode('utf-8').strip(),'pos':'em'})
		if qu:
			word[1] = 'em'
			word.append(qu['polarity'])
			continue
		qu = word[0].encode('utf-8').strip().lower()
		qu = re.sub(":"," ",qu)
		if 'n\'t' in qu or 'not' in qu or qu == 'not' or qu == 'n\'t' or qu == 'no':
			word[1] = 'neg'
			word.append("")
			continue
		single = None
		single = collection.find_one({'word':qu,'pos':word[1][:2].lower()})
		if single:	#word as it is
			word[1] = single['pos']
			word.append(single['polarity'])
		else: #convert to 1 person present tense
			qu = conjugate(qu,'1sg')
			single = collection.find_one({'word':qu,'pos':word[1][:2].lower()})
			if single:
				word[1] = single['pos']
				word.append(single['polarity'])
			else:
				word.append('')
	return sentence
    def create_description(self):
        pat = 'VB|VBD|VBZ|VBG * NN IN * NN'
        #pat = 'PRP * VB|VBD|VBZ|VBG * NN'
        phrases = search.search_out(self.source_text, pat)
        conjugated_phrases = []
        for phrase in phrases:
            words = []
            for word, pos in tag(phrase):
                if pos in ["VBZ", "VBD", "VB", "VBG"]:
                    words.append(conjugate(word, "3sg"))
                #elif pos == "NN" and random.random() < .1:
                    #words.append(self.define_word(word))
                else:
                    words.append(word)
            conjugated_phrases.append(' '.join(words))

        artifacts = list(self.artifacts)

        sentence_prefixes = ["The present invention", "The device", "The invention"]
        paragraph_prefixes = ["The present invention", "According to a beneficial embodiment, the invention", "According to another embodiment, the device", "According to a preferred embodiment, the invention", "In accordance with an alternative specific embodiment, the present invention"] 
        i = 0
        self.description = ''
        for phrase in conjugated_phrases:
            line = ""
            if i == 0:
                line = paragraph_prefixes[0] + " " + phrase
            else:
                if random.random() < .1:
                    line = "\n\n" + random.choice(paragraph_prefixes) + " " + phrase
                else:
                    line = random.choice(sentence_prefixes) + " " + phrase
            self.description += line + ". "
            i += 1
示例#10
0
文件: test_en.py 项目: daeon/pattern
 def test_conjugate(self):
     # Assert different tenses with different conjugations.
     for (v1, v2, tense) in (
       ("be", "be",    en.INFINITIVE),
       ("be", "am",    en.PRESENT_1ST_PERSON_SINGULAR),
       ("be", "are",   en.PRESENT_2ND_PERSON_SINGULAR),
       ("be", "is",    en.PRESENT_3RD_PERSON_SINGULAR),
       ("be", "are",   en.PRESENT_PLURAL),
       ("be", "being", en.PRESENT_PARTICIPLE),
       ("be", "was",   en.PAST_1ST_PERSON_SINGULAR),
       ("be", "were",  en.PAST_2ND_PERSON_SINGULAR),
       ("be", "was",   en.PAST_3RD_PERSON_SINGULAR),
       ("be", "were",  en.PAST_PLURAL),
       ("be", "were",  en.PAST),
       ("be", "been",  en.PAST_PARTICIPLE),
       ("had", "have",   "inf"),
       ("had", "have",   "1sg"),
       ("had", "have",   "2sg"),
       ("had", "has",    "3sg"),
       ("had", "have",   "pl"),
       ("had", "having", "part"),
       ("has", "had",    "1sgp"),
       ("has", "had",    "2sgp"),
       ("has", "had",    "3sgp"),
       ("has", "had",    "ppl"),
       ("has", "had",    "p"),
       ("has", "had",    "ppart"),
       ("imaginerify", "imaginerified", "3sgp")):
         self.assertEqual(en.conjugate(v1, tense), v2)
     print "pattern.en.conjugate()"
示例#11
0
文件: model.py 项目: vm/lessandmore
    def _transform_word(self, word, pos, less, more):
        """transforms a word to be less less and more more

        :param word: word to transform
        :type word: str

        :param pos: part of speech of the word
        :type pos: str

        :param less: list of 'less' words
        :type less: list

        :param more: list of 'more' words
        :type more: list

        :returns: transformed word
        :rtype: str
        """

        new_word = self._get_similar_word(word, less, more)
        new_pos = en.tag(new_word)[0][1]

        if (pos[:2] != new_pos[:2]) or word == new_word:
            return word

        # handle noun
        if pos.startswith('NN'):

            # pluralization
            if pos.endswith('S') and not new_pos.endswith('S'):
                new_word = en.pluralize(new_word)

            elif not pos.endswith('S') and new_pos.endswith('S'):
                new_word = en.singularize(new_word)

            # capitalization
            if word[0].isupper():
                new_word = new_word[0].upper() + new_word[1:]
            else:
                new_word = new_word.lower()

        # handle verb
        elif pos.startswith('VB'):

            tense, person, number = en.tenses(word)[0][:3]

            # conjugation
            conjugated = en.conjugate(new_word,
                                    tense=tense,
                                    person=person,
                                    number=number,
                                    parse=False)

            if conjugated is not None:
                new_word = conjugated

        # remove underscores for joint words
        new_word = new_word.replace('_', ' ')

        return new_word
示例#12
0
def verbConjugate(lemma, rel, aan):
    relAvoid = ["/r/CapableOf", "/r/PartOf", "/r/MemberOf"
                "/r/IsA", "/r/HasA", "/r/TranslationOf",
                "/r/HasProperty"]
    if not rel in relAvoid:
        s = parsetree(lemma, relations=True)
        try:
            vb = s[0].verbs[0].words[0].string
            result = lemma.replace(vb, conjugate(vb, "part"))
        except:
            result = lemma
        else:
            if vb == "to":
                result = lemma

        # if not aan:
        #     try:
        #         firstWord = s[0].chunks[0].words[0].string
        #         reconjugated = conjugate(firstWord, "part")
        #         result = lemma.replace(firstWord, reconjugated)
        #     except:
        #         result = lemma

    else:
        result = lemma
        
    return result
示例#13
0
def make_thesaurus(file_path):
    """
    Returns dict of counters 'thesaurus', where
    thesaurus[word] = { synonym1: 4, syn2: 8, syn3: 1, ... }
    """
    thesaurus = defaultdict(lambda: Counter())

    with open(file_path, "r") as f:
        for line in f:

            # Ignore repeated book title headers
            if _is_title(line):
                continue

            parsed = parse(line)

            for tagged_word in parsed.split()[0]:
                word = tagged_word[0].strip().lower()
                pos = tagged_word[1][0]  # get pos for word

                # Reject non-ASCII characters
                try:
                    word = word.decode("ascii")
                except (UnicodeDecodeError, UnicodeEncodeError):
                    continue

                # Reject whitespace character
                if re.match("^[\s]*$", word):
                    continue

                # Increment word count of word w
                thesaurus[word].update([word])

                # Retrieve syn = synonym[w], add to thesaurus[syn]
                for syn in wn.get_synonyms(word):
                    syn = syn.name().split(".")[0]

                    # if noun, add plural form if word is plural, else add singular
                    if pos == "N":
                        if word == pluralize(word):
                            thesaurus[pluralize(syn)].update([word])
                        else:
                            thesaurus[syn].update([word])
                    # if verb, conjugate synonyms to the right form before adding them to thes
                    elif pos == "V":
                        word_tenses = tenses(word)
                        if word_tenses:
                            thesaurus[conjugate(syn, tense=word_tenses[0][0])].update([word])
                        else:
                            thesaurus[syn].update([word])
                    else:
                        thesaurus[syn].update([word])

    # Update thesaurus with mappings, if map_file exists
    file_path = file_path.replace(config.CORPUS_FOLDER, config.MAPPING_FOLDER)
    map_file = file_path.replace(config.CORP_TAG, config.MAP_TAG)
    thesaurus = _add_mappings(map_file, thesaurus)

    return thesaurus
示例#14
0
def get_conjugations(lem):
    vforms = []
    if lemma(lem) == 'be':
        vforms = [i for i in EXCEPTIONS]
    else:
        for ta in TENSE_ASPECTS:
            c = conjugate(lemma(lem), ta)
            vforms.append( c +'|||'+ ta)
    return vforms
  def generalize(self, words):
    # Convert to generalized sentence (not company specific)
    we = 'We__PRP'
    words[0] = we

    # Pluralize the verb using pattern library
    pl = conjugate(self.remove_pos_tag(words[1]), 'pl') + '__VBP'
    words[1] = pl
    return words
示例#16
0
def deconjugate_leaves(T):
    if T.height() == 1:
        return
    if T.height() == 2:
        if T.label()[0] == "V":
            T.set_label("VB")
            T[0] = conjugate(T[0], "VB")
    else:
        for i in T:
            deconjugate_leaves(i)
示例#17
0
 def uncompleteSentence(self, sentence):
     full_sentence = nltk.word_tokenize(' '.join(sentence))
     tags = nltk.pos_tag(full_sentence)
     new_sentence = []
     for i in range(len(full_sentence) - 1):
         isVerb = True
         synsets = wordnet.synsets(tags[i][0])
         for syn in synsets:
             if 'verb.' not in syn.lexname:
                 isVerb = False
                 break
         if ('NN' == tags[i][1] or tags[i][1] == 'RB') and isVerb:
             new_sentence.append(conjugate(tags[i][0], 'part'))
         elif tags[i][1] == 'JJ' and isVerb:
             new_sentence.append(conjugate(tags[i][0], 'ppart'))
         else:
             new_sentence.append(tags[i][0])
     new_sentence.append(full_sentence[-1])
     return new_sentence
示例#18
0
 def process(wrd):
     tmp = ''
     ignore_pos = ['IN', 'RP', 'TO']
     exception_lemma = ['flatter', 'flattered']
     if tag(wrd)[0][1] in ignore_pos:
         tmp = wrd
     elif any(wrd in ex_l for ex_l in exception_lemma):
         tmp = wrd
     else:
         tmp = conjugate(wrd, tense=PAST)
     return tmp
示例#19
0
def make_verbs(inp_fn, out_dir):
  verbs = set()
  with open(inp_fn) as f:
    for i, line in enumerate(f):
      verbs.add(conjugate(line.strip(), 'p'))
  print '\tFound', len(verbs), 'verbs'

  out_fn = out_dir + '/verbs.txt'
  with open(out_fn, 'w') as f:
    for n in verbs:
      f.write(n + '\n')
  return
示例#20
0
文件: test_en.py 项目: daeon/pattern
 def test_parse_lemma(self):
     # Assert the accuracy of the verb lemmatization algorithm.
     # Note: the accuracy is higher (95%) when measured on CELEX word forms
     # (presumably because en.inflect.VERBS has high percentage irregular verbs).
     i, n = 0, 0
     for v in en.inflect.VERBS.infinitives:
         for tense in en.inflect.VERBS.TENSES:
             if en.inflect._parse_lemma(en.conjugate(v, tense)) == v: 
                 i += 1
             n += 1
     self.assertTrue(float(i) / n > 0.88)
     print "pattern.en.inflect._parse_lemma()"
示例#21
0
def random_imperative(noun=None, get_related=True, verb=None, adj=None):
    if noun:
        n = get_related_or_not(noun, True, 'NN') if get_related else noun
    else:
        n = random.choice(NOUNS)
    if verb:
        v = get_related_or_not(verb, True, 'VB')
        if v is None:
            v = verb
    else:
        v = random.choice(VERBS)
    if not adj:
        adj = random.choice(ADJS) if coin_flip(0.5) else ''
    c = ''

    if coin_flip(0.7):
        n = pluralize(n)
        c = random.choice(C2)
    else:
        i = random.randint(1, 5)
        n = quantify(adj + ' ' + n, amount=i)
        adj = ''

    if coin_flip(0.25):
        a = ''
        v = conjugate(v)
    elif coin_flip(0.33):
        v = conjugate(v, 'part') # present participle
        a = random.choice(A1)
        c = random.choice(C)
    elif coin_flip(0.5):
        v = conjugate(v)
        a = random.choice(A2)
    else:
        v = conjugate(v)
        a = random.choice(A3)
    
    phrase = '{0} {1} {2} {3} {4}'.format(a, v, c, adj, n)
    phrase = phrase[1:] if phrase.startswith(' ') else phrase
    return re.sub(' +', ' ', phrase)
示例#22
0
def create_defined_template(arr):
	for x in arr:
		x = x.splitlines()[0]
		if x[2] == "P":
			for t in tenses:
				a = conjugate(x[3:], t)
				triggers[x[0]][x[:3]].append(str(a))
	
		elif x[3:].find(" ") != -1 and x[3:] not in triggers_bi:
			triggers_bi.append(x[3:])
		elif x[:3] in ["BPA", "BPE"] and x[3:] not in triggers_p:
			triggers_p.append(x[3:])
		else:
			triggers["OTHERS"].append(x[3:])
示例#23
0
def generate_phrase_1():
    selections = [VERBS, ADJECTIVES, NOUNS, ADVERBS, TRANSITIVE_VERBS, VERBS, ADJECTIVES, NOUNS]
    entropy = sum([log(len(item), 2) for item in selections])
    conjugations = ['part', None, None,
                    None, [random_item_from_list([PAST, PRESENT])],
                    'part', None, None]
    entropy += 1
    print('%.2f bits of entropy' % entropy)
    sub_list = [random_item_from_list(item) for item in selections]
    for idx, word in enumerate(sub_list):
        if conjugations[idx]:
            sub_list[idx] = conjugate(word, *conjugations[idx])

    return ('the %s %s %s %s %s the %s %s %s' % tuple(sub_list)).replace('_', ' ')
示例#24
0
def generate_phrase_2():
    '''Return a phrase and its entropy (in bits) of the form
       (# adj noun) (adverb verb) (adjective noun punctuation)

    E.g.,
       17 MODERATE TRAYS At once live outed wORTH bOSSES
    '''
    selections = [ADJECTIVES, NOUNS,
                  ADVERBS, TRANSITIVE_VERBS,
                  ADJECTIVES, NOUNS, TERMINAL_PUNCTUATION]
    entropy = sum([log(len(item), 2) for item in selections])
    conjugations = [None, None,
                    None, [random_item_from_list([PAST, PRESENT]), 3, PLURAL],
                    None, None,
                    None]
    sub_list = [random_item_from_list(item) for item in selections]
    for idx, word in enumerate(sub_list):
        if conjugations[idx]:
            sub_list[idx] = conjugate(word, *conjugations[idx])
    entropy += 1

    sub_list[1] = pluralize(sub_list[1])
    sub_list[5] = pluralize(sub_list[5])

    entropy += log(997, 2)

    for idx, item in enumerate(sub_list):
        rnd = randint(4)
        if rnd == 1:
            sub_list[idx] = item.capitalize()
        if rnd == 2:
            sub_list[idx] = item.upper()
        if rnd == 3:
            sub_list[idx] = item[0] + item[1:].upper()

        entropy += 2

    phrase = ('%i %s %s %s %s %s %s%s' % tuple([randint(997) + 2] + sub_list)).replace('_', ' ')

    # Insert a random symbol into the sentence
    insert_point = randint(len(phrase) + 1)
    entropy += log(len(phrase) + 1, 2) + log(len(SYMBOLS), 2)
    phrase = phrase[:insert_point] + random_item_from_list(SYMBOLS) + phrase[insert_point:]

    insert_point = randint(len(phrase) + 1)
    entropy += log(len(phrase) + 1, 2) + log(len(SYMBOLS), 2)
    phrase = phrase[:insert_point] + random_item_from_list(SYMBOLS) + phrase[insert_point:]

    return phrase, entropy
示例#25
0
文件: rank.py 项目: muyun/dev.nlp
def _interface(sentence,edblist):
	target_words, word_pre, person_taggers, org_taggers =  _Stem(sentence, edblist)
	token_list =[]
 
        #import pdb; pdb.set_trace()
        #print "word_pre:", word_pre
        if len(word_pre) > 0:
            word_pre[0] = word_pre[0][0].upper() + word_pre[0][1:]
        #import pdb; pdb.set_trace()
	for word in word_pre:

                #import pdb; pdb.set_trace()
	        tokens = {}
                #if word == "He": # is a person, subject?
                #        tokens[word] = ["He", "She"]
                if word.strip().lower() == person_taggers.strip().lower():
                        tokens[word] = [word, "He", "She"]
                        #tokens[word] = [ "They"]
                        
                elif word.strip().lower() == org_taggers.strip().lower():
                        if _isplural(org_taggers.strip().split()[-1]) or (org_taggers.strip().split()[-1] == 'they'):
                                tokens[word] = [word, "They"]
                        else:
                                tokens[word] = [word, "It"]
                        #tokens[word] = [ "It"]
                #        pass
                else:
                        if lmtzr.lemmatize(word) not in target_words:
                                token_list.append(word)
                        else:
                                r_sent = []
			        candidates = Generate_candidates_topN(word,sentence,19,edblist)
			        for i in range(len(candidates)):
				        r_sent.append(candidates[i] + "@" + sentence.replace(word,candidates[i]))
			        sub_top10 = kenlm_topn(r_sent,9,sentence)
			        if lmtzr.lemmatize(word) not in sub_top10:
			        	sub_top10.insert(0,word)

                                if len(tenses(word)) > 0:
                    	                _sub_top10 = []
			                for w in sub_top10:
			                    _sub_top10.append(conjugate(w, tenses(word)[0][0], 3))
			                tokens[word] = _sub_top10
		                else:
                                        tokens[word] = sub_top10
                                
                if tokens: token_list.append(tokens)
                
	return token_list
def verb_extract(child_speech):
	tokenized = [nltk.word_tokenize(line) for line in child_speech]  
	tokenized = list(itertools.chain.from_iterable(tokenized))
	tokens = [token for token in tokenized if re.match("^[A-Za-z.]*$", token)]
	verbs = []
	if TAGGER == "senna" or TAGGER == "nltk":
		tokens = ' '.join(tokens).split('.')
		tagged = [[verb.lower() for (verb,POS) in liste if POS.startswith('VB')] for \
		liste in [nltk.pos_tag(nltk.word_tokenize(token)) for token in tokens]]
		verbs = filter(None, tagged)    
		verbs = [item for sublist in verbs for item in sublist]	
	elif TAGGER == "spacy":
		tokens = ' '.join(tokens)
		tokens = TAG(unicode(tokens), entity=False)
		tokens = zip(tokens, [str(tok.tag_) for tok in tokens])
		verbs = [str(verb).lower() for (verb, pos) in tokens if pos.startswith('VB')]        
	verbs = [(verb, STEM(verb).encode("utf8")) for verb in verbs]
	verbs = [(verb, stem) for (verb, stem) in verbs if not (verb in EXCLUSIONS or \
		not conjugate(stem, tense="infinitive") or \
		is_no_change(stem) or \
		conjugate(stem, tense="infinitive") in LIGHT or \
		not in_vocabulary(conjugate(stem, tense="infinitive")) or \
		conjugate(stem, tense="past").endswith("ed"))]
	return verbs
示例#27
0
	def tagLemma(self, word_old):
		#print tag(word_old)
		for word, pos in tag(word_old): 
			if pos=="NNS": #plurales
				x = singularize(word)
			elif pos in ["VB","VBG","VBZ","VBP","VBD","VBN","MD"]: # verbos a infinitivo 
				x = conjugate(word, INFINITIVE)
				#To-Do: fix this
				if x: # a veces da error al conjugar
					x = x
				else:
					x = word
			else:
				x = word  
		return x
示例#28
0
文件: faq.py 项目: VRDate/twss
def processAction(statement,database_name = DATABASE_NAME):
  #raise Exception(statement)
  match = re.search(r"what happens (?:(?:if)|(?:when)) (?:the)? ([\s\w]+) ([\s\w]+?) ([\s\w]+)\??",statement)
  #raise Exception(match.group(0))
  if match:
    # need to search action table for 
    subj = match.group(1)
    verb = match.group(2)
    verb = conjugate(verb,tense='infinitive')
    obj = match.group(3)
    result = queryTable("actions",{"origin":subj,"ident":verb,"target":obj},database_name)
    if result == None: 
      return "Sorry, I don't what happens when " + subj + " " + verb + " " + obj
    result = queryTable("reactions",{"origin":obj,"action":verb},database_name)
    (table,thing) = findTableContainingEntityWithIdentOrName(obj, database_name)
  return thing[0] + " says " + result['name']
示例#29
0
def add_qualifier(phrase):
	n = random.choice(NOUNS)
	v = random.choice(VERBS)

	if coin_flip(0.5):
		a = 'cannot'
	else:
		a = 'can'

	if coin_flip(0.5):
		b = 'of'
	else:
		b = 'for the'

	n = pluralize(n)
	v = conjugate(v)

	qual = '{0} you {1} {2}'.format(n, a, v)
	return '{0} {1} {2}'.format(phrase, b, qual)
示例#30
0
def verbConjugate(lemma, rel, aan):
    relAvoid = ["/r/CapableOf", "/r/PartOf", "/r/MemberOf"
                "/r/IsA", "/r/HasA", "/r/TranslationOf",
                "/r/HasProperty"]
    if not rel in relAvoid:
        s = parsetree(lemma, relations=True)


        try:
            vb = s[0].verbs[0].words[0].string
            result = lemma.replace(vb, conjugate(vb, "part"))
        except:
            result = lemma
        else:
            if vb in ["to", "can"]:
                result = lemma

        # if not aan:
        #     try:
        #         firstWord = s[0].chunks[0].words[0].string
        #         reconjugated = conjugate(firstWord, "part")
        #         result = lemma.replace(firstWord, reconjugated)
        #     except:
        #         result = lemma

    else:
        result = lemma

    # NEW PART TO ADD ARTICLES TO BARE NOUN PHRASES FROM CONCEPTNET
    # try:
    #     nns = s[0].subjects + s[0].objects
    # except:
    #     pass
    # else:
    #     if nns:
    #         for nn in nns:
    #             if not startsWithCheck(nn.string, ['a', 'an', 'the', 'your', 'his', 'her', 'its']):
    #                 result = result.replace(nn.string, a_or_an(nn.string))
    #     else:
    #         pass
        
    return result
示例#31
0
"""Transformations for extracting data."""

from pattern.en import (conjugate, PRESENT, INDICATIVE, PROGRESSIVE)
import spacy

from . import base

# TODO: remove the following lines once this issue:
# https://github.com/clips/pattern/issues/243, is resolved.
try:
    conjugate('give')
except:
    pass


class GerundifyingTransformer(base.LoggedCallable):
    """Transform the input into a gerund phrase."""

    _nlp = spacy.load('en', disable=['ner'])

    @classmethod
    def _is_root(cls, token: spacy.tokens.token.Token) -> bool:
        return token.dep_ == 'ROOT' and token.pos_ == 'VERB'

    @classmethod
    def _is_rootlike(cls, token: spacy.tokens.token.Token) -> bool:
        return (cls._is_root(token)
                or any(cls._is_root(c) for c in token.conjuncts))

    @classmethod
    def _conjugate(cls, text: str) -> str:
示例#32
0
def toPresent(verb):
    return conjugate(verb, PRESENT)
示例#33
0
def failure_description_ngram_detect(sentences):

    #stop_word_to_investigae = ['to','is' ,'are' , 'not'  , 'need' , 'reported' ,'seem' ,'seems' ,'appear' ,'appears']
    stop_word_to_investigae = ['is', 'are', 'not', 'to', 'cannot']

    for stop_word in stop_word_to_investigae:

        stopwords_2 = copy.deepcopy(Utility.stopwords_nltk_pattern_custom)
        if stop_word in stopwords_2:
            stopwords_2.remove(stop_word)

        phrases = Phrases(
            sentences,
            max_vocab_size=max_vocab_size,
            min_count=bigram_minimum_count_threshold,
            threshold=threshold,
            common_terms=frozenset(stopwords_2),
            delimiter=delimiter,
            progress_per=progress_per
        )  # use # as delimiter to distinguish from ~ used in previous stages

        with open(save_folder_name + '/' + stop_word + '_bigrams.txt',
                  "w") as bigram_2_file:
            c = 1
            for key in phrases.vocab.keys():
                #if key not in Utility.stopwords:
                if key not in Utility.stopwords_nltk_pattern_custom:
                    flag = True
                    a = key.decode()
                    a = a.split("#")
                    if len(a) > 1:
                        if stop_word not in a:  # or ('not' not in a and  'be' not in a) :
                            flag = False

                        if a[0] != stop_word:  #only look for n-grams starting with the stop-word
                            flag = False

                        if stop_word == 'to' and 'be' not in a:  #if stop_word is 'to', only look for bigram that also has 'bo'.
                            flag = False

                        for w in a[
                                1:]:  # go through the rest of the list, and see if positional word are there
                            if w in Utility.List_of_positional_word:
                                flag = False

                        if flag:
                            # aspell_checker is the original n grams delimited by #
                            if stop_word == 'to':
                                last_word = a[-1]
                                conjugated_last_word = conjugate(last_word)
                                if conjugated_last_word in Utility.List_of_maintenance_verb:
                                    logger.info("action word found: " +
                                                conjugated_last_word)
                                    s = '~'.join(a)
                                    if s not in List_of_maintenance_action_ngram:
                                        List_of_maintenance_action_ngram.append(
                                            s)
                                    continue  #skip the rest code, so that it is not writen into the file

                            if stop_word == 'is' or stop_word == 'are':
                                w = a[1:]
                                ngram_without_is_are = delimiter.decode().join(
                                    w)
                                if ngram_without_is_are not in List_of_failure_description_ngram_without_is_are:
                                    List_of_failure_description_ngram_without_is_are.append(
                                        ngram_without_is_are)
                                if len(w) == 1 and w[
                                        0] not in List_of_failure_description_single_word:
                                    List_of_failure_description_single_word.append(
                                        w[0])

                            s = key.decode()
                            print('{0}\t\t{1:<30}\t\t{2:<10}'.format(
                                c, s, phrases.vocab[key]),
                                  file=bigram_2_file)
                            c += 1

    with open(
            "./Input_Output_Folder/Failure_Description/List_of_failure_description_ngram_without_is_are.txt",
            "w") as words_file:
        for index_no, w in enumerate(
                List_of_failure_description_ngram_without_is_are):
            print('{0}\t\t{1:<10}'.format(index_no, w), file=words_file)

    with open(
            "./Input_Output_Folder/Failure_Description/List_of_failure_description_single_word.txt",
            "w") as words_file:
        for index_no, w in enumerate(
                sorted(List_of_failure_description_single_word)):
            print('{0}\t\t{1:<10}\t\t{2:<10}'.format(
                index_no, w, normalized_token_freq_dict[w]),
                  file=words_file)

    with open(
            "./Input_Output_Folder/Failure_Description/List_of_maintenance_action_ngram.txt",
            "w") as words_file:
        for index_no, w in enumerate(sorted(List_of_maintenance_action_ngram)):
            print('{0}\t\t{1:<10}'.format(index_no, w), file=words_file)
	def conjugateVerb(self, verb, tense):
		conjugated = conjugate(verb, tense)
		return conjugated
def extractFeaturesAndWriteBio(READ_PATH, file_type):

    global ALL_poems, bio, cnt

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:

            num_of_files = len(files) - 1  # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)

            if file_type in file and 'readme' not in file:

                # ID
                id = file.split(".")[0]
                print "\nID:", id.split("_")[1]

                filenames.append(id)
                cnt += 1

                # print('')
                # print('')
                # print('OPENED:',id)
                # print('')
                # print('')

                poem_replaced = ""
                replacement_word = ""
                previous_replacement_word = ""

                author = ""
                titles = ""
                title = ""
                new_title = ""

                replaced_ls = []
                new_titles_ls = []
                quit_language = 0
                oscillator = 0

                word_cnt = 0

                # if EXCEPTION is raised... do not add to html
                SKIP_bool = False

                ##########################
                # Load  POEM TEXT FILE     #
                ##########################

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split(
                    "_")[1] + ".txt"
                print "txt_fn_path:", txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt > 0:
                    txt_data = open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author = txt_data.split("****!****")[0].strip(' \t\n\r')

                    title = txt_data.split("****!****")[1].strip(' \t\n\r')

                    bio = txt_data.split("****!****")[2]  #.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t", "&#9;")
                    bio.replace("\n", " <br>")
                    bio.replace("\r", " <br>")
                    poem_replaced = bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME in poem
                    ##############################
                    author_ln = author.split(" ")[-1].lstrip()
                    author_fn = author.split(" ")[:-1]
                    author = " ".join(n for n in author_fn) + author_ln
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')

                    #######################
                    # fake AUTHOR
                    #######################

                    new_author = " ".join(
                        random.choice(authors).split(" ")
                        [1:-2]) + " " + random.choice(authors).split(" ")[-2]
                    #print "new AUTHOR",new_author

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(
                                    int(w2) - 5,
                                    int(w2) + 5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(
                                    w2, str(new_num))
                                replaced_ls.append(new_num)

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH +
                                            "poetryFoundation_" +
                                            id.split("_")[1] +
                                            "_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx, entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2", " ")
                                ce = ce.replace("0xe2", "'")
                                ce = re.sub(
                                    '(' +
                                    '|'.join(import_utilities.chars.keys()) +
                                    ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode(
                                        'ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:

                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(
                                        content, entity['type'])

                                    cr = re.sub(
                                        '(' + '|'.join(
                                            import_utilities.chars.keys()) +
                                        ')', import_utilities.replace_chars,
                                        replacement_entity)

                                    poem_replaced = poem_replaced.replace(
                                        content, replacement_entity)
                                    replaced_ls.append(replacement_entity)

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(
                                i[0]) > 3 and i[0] != "died":
                            origw = re.sub(
                                '(' + '|'.join(import_utilities.chars.keys()) +
                                ')', import_utilities.replace_chars, i[0])
                            origw = import_utilities.strip_punctuation(origw)
                            if i[1] == 'JJ':
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    JJr.lstrip().lstrip())

                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', JJr, poem_replaced,
                                    1)  #poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1] == 'RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', RBr, poem_replaced, 1)
                                replaced_ls.append(RBr)

                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line in poem_replaced.split('\n\r'):
                        if len(line) > 0:
                            if "english" not in import_utilities.get_language(
                                    line):
                                quit_language += 1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language -= 1

                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx, word in enumerate(poem_replaced.split(' ')):

                        if "<br>" not in word and "&#9;" not in word and len(
                                word) > 0:

                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'")) > 1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(
                                        personal_pronouns) + "'" + word.split(
                                            "'")[1] + ' '
                                poem_replaced.replace(word, replacement_word)
                                #print "word,",word,"replacement_word:",replacement_word

                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            elif not word.lower().strip(
                                    " \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(
                                    word)
                                word_nopunct = word_punct_nopunct[
                                    'word'].strip(" \n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################
                                if word_nopunct[-4:].lower() == "here":
                                    similarterm = random.choice(
                                        import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct) > 3:

                                        oscillator = oscillator + 1

                                        ############################################
                                        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        # STYLE SWITCH..... should in future use POS
                                        # ... i.e. if noun & oscillator%3, do...
                                        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        ############################################
                                        # synset
                                        similarterm = import_utilities.synset_creeley(
                                            word_nopunct)
                                        #print "synset", similarterm

                                        if similarterm is not None and similarterm == word_nopunct and len(
                                                word_nopunct) > 5:
                                            RESERVOIR.sort(key=len)
                                            similarterm = RESERVOIR[
                                                idx % len(RESERVOIR)]
                                            print idx, len(
                                                RESERVOIR
                                            ), similarterm, word_nopunct, "PRE>>>>>>>>LAST CHANGE STOP: ", word, "~", similarterm

                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                naw_terms = ["mind", "lonely"]
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "Nox":
                                    ##print "like"
                                    similarterm = "nite"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"
                                if similarterm == "velleity":
                                    ##print "doth"
                                    similarterm = "want"
                                if similarterm == "Crataegus laevigata":
                                    ##print "doth"
                                    similarterm = "may"
                                if similarterm == "eff":
                                    ##print "doth"
                                    similarterm = "know"
                                if similarterm == "naw":
                                    ##print "doth"
                                    similarterm = "mind"
                                #######################################
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper(
                                ) in import_utilities.state_abbrev and word_nopunct.lower(
                                ) not in stopwords.words(
                                        'english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh":
                                        similarterm = random.choice(
                                            import_utilities.exclaims)
                                    else:

                                        similarterm = random.choice(RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp = word.split("-")
                                #print word,len(hyp)
                                if len(hyp) > 1:
                                    similarterm = ""
                                    for w in hyp:
                                        if len(w) > 2:
                                            if import_utilities.synset_creeley(
                                                    w) is not None:
                                                similarterm += import_utilities.synset_creeley(
                                                    w) + "-"
                                            else:
                                                similarterm += w + "-"
                                    similarterm = import_utilities.strip_underscore(
                                        similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm

                                # #########################################################
                                # # is it a TRUNCATED VERB slang as in singin or wishin   #
                                # #########################################################
                                # if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                #     similarterm = import_utilities.synset_creeley(word_nopunct+'g')
                                #     ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                #     interim = import_utilities.lemma(similarterm)
                                #     ## #print interim
                                #     similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1]
                                #     # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]

                                #################
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(
                                        import_utilities.curses)
                                    ##print "SWEAR WORD word: '"+word+"'",similarterm

                                #print "SIMILAR:",similarterm

                                if similarterm is not None:
                                    if len(hyp) > 1:
                                        replacement_word = similarterm
                                    else:
                                        replacement_word = word.replace(
                                            word_nopunct, similarterm)
                                        replacement_word = import_utilities.strip_underscore(
                                            replacement_word)
                                        replacement_word = import_utilities.replaceNumbers(
                                            replacement_word)

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################

                                if word_nopunct.lower(
                                ) in import_utilities.impera:
                                    replacement_word = random.choice(
                                        import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.conjuncts:
                                    replacement_word = random.choice(
                                        import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.indef_prono:
                                    replacement_word = random.choice(
                                        import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.prepo:
                                    replacement_word = random.choice(
                                        import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.rel_prono:
                                    replacement_word = word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower()[-2:] == "ly":
                                    if import_utilities.synset_creeley(
                                            word) is not None:
                                        replacement_word = import_utilities.strip_underscore(
                                            import_utilities.synset_creeley(
                                                word))  #(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"

                                else:
                                    if len(
                                            hyp
                                    ) < 2 and "like" not in word_nopunct and import_utilities.singularize(
                                            word_nopunct
                                    ) == import_utilities.singularize(
                                            replacement_word
                                    ) and word_nopunct.lower(
                                    ) not in import_utilities.stopwords_ls:

                                        if word not in RESERVOIR and quit_language < 0 and import_utilities.countPunctuation(
                                                word
                                        ) < 1 and len(
                                                word_nopunct
                                        ) > 3 and not word_nopunct.istitle():

                                            #print "ADDING",word,"to reservoir"
                                            ############################
                                            # ADDING ONLY SMALL WORDS
                                            ############################
                                            if len(word) < 7:
                                                RESERVOIR.append(word)

                                            replacement_word = random.choice(
                                                rap_mouth)  # RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                    # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language > 1 and not word_nopunct.istitle(
                                ):
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(
                                        rap_mouth)  #RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word

                                ###################################################
                                # MOST REPLACEMENT occurs here...                 #
                                ###################################################
                                poem_ls = poem_replaced.split(' ')
                                idx = poem_ls.index(word)

                                # print idx,",", poem_ls[idx],",", word ,",",replacement_word
                                #print word ," --- ",previous_replacement_word,replacement_word

                                if len(word) > 3 and replacement_word.lstrip(
                                ).rstrip() == word_nopunct.lstrip().rstrip():
                                    # try alchemy?

                                    # a
                                    RESERVOIR.sort(key=len)
                                    replacement_word = RESERVOIR[
                                        idx % len(RESERVOIR)]
                                    print idx, len(
                                        RESERVOIR
                                    ), "LAST CHANGE STOP: ", word, "~", replacement_word

                                try:

                                    if poem_ls[
                                            idx] == word and "****" not in word and "." != word and "\n" not in word:
                                        poem_ls[
                                            idx] = replacement_word  #.encode('utf-8')
                                    poem_replaced = " ".join(poem_ls)

                                    # store this word so that conjugation can be checked
                                    previous_replacement_word = replacement_word
                                except Exception, e:
                                    #print "PENULTIMATE SKIP_bool replace FAIL",e
                                    SKIP_bool = True
                                    continue

                    ###########################################################################
                    # testing Pattern.en as parser for conjugation and article replacement    #
                    # much more robust than my hand-coded hacks                               #
                    ###########################################################################

                    # correct CONJUGATion of paticiple verbs with pattern.en
                    parsed = parse(poem_replaced, tags=True)
                    pre_verbal = ["'m", "'s", "'re"]
                    for idx, p in enumerate(parsed.split(" ")):
                        tok = p.split("/")[0]
                        typ = p.split("/")[1]
                        #print idx,tok,typ
                        if tok in pre_verbal:
                            #print "pre_verbal:",tok
                            next_word = parsed.split(" ")[idx + 1].split("/")

                            # try try try
                            for ix, n in enumerate(next_word):
                                next_word[ix] = re.sub(
                                    '(' +
                                    '|'.join(import_utilities.chars.keys()) +
                                    ')', import_utilities.replace_chars,
                                    n).encode('utf-8')
                            try:
                                #print  next_word,next_word[0],next_word[1][:2]
                                # if it's a verb that follows
                                if next_word[1][:2] == "VB":
                                    before_verb = " ".join(
                                        w for w in poem_replaced.split(" ")
                                        [:idx])  #.encode('utf-8')
                                    after_verb = " ".join(
                                        w for w in poem_replaced.split(" ")
                                        [idx + 1:])  #.encode('utf-8')
                                    new_verb = conjugate(
                                        next_word[0],
                                        tense=PARTICIPLE,
                                        parse=True).encode('utf-8')
                                    # insert new
                                    #print "CONJUGATION needed, changing:",poem_replaced.split(" ")[idx],"to",parsed.split(" ")[idx],poem_replaced.split(" ")[idx-1]+" "+new_verb
                                    poem_replaced = before_verb + " " + new_verb + " " + after_verb
                            except Exception, e:
                                # print "INside parsed COnjugation loop",e
                                continue

                    # correct ARTICLES
                    for idx, word in enumerate(poem_replaced.split(" ")):
                        if len(word) > 0 and idx != 0 and " " not in word:
                            # A or AN
                            if poem_replaced.split(" ")[idx - 1].lower(
                            ) == "a" or poem_replaced.split(" ")[
                                    idx - 1].lower() == "an":
                                #print word,"---",article(word)+" "+word
                                before_article = " ".join(
                                    w for w in poem_replaced.split(" ")[:idx -
                                                                        1])
                                after_article = " ".join(
                                    w for w in poem_replaced.split(" ")[idx +
                                                                        1:])
                                new_conj = referenced(word)
                                # capitalize
                                if poem_replaced.split(" ")[idx - 1].istitle():
                                    new_conj = new_conj.split(" ")[0].title(
                                    ) + " " + new_conj.split(" ")[1]
                                poem_replaced = before_article + " " + new_conj + " " + after_article

                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    if not SKIP_bool:

                        tmp_poem = ""

                        # poem_replaced.replace("\t","&#9;")
                        # poem_replaced.replace("\n"," <br>")
                        # poem_replaced.replace("\r"," <br>")

                        HTML_poem = ""
                        for line in poem_replaced.split("\n"):
                            #print "LINE", line
                            HTML_poem += line + "<br>"

                        if len(response) > 0 and len(id.split("_")) > 1:
                            # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem

                            try:
                                ALL_poems = "<br>[ A  generated-poem based upon: <i>" + title + "</i> by <b>" + author + "</b>]<br><br><i>" + new_title + "</i><br> by <b>" + new_author + "</b><br>" + HTML_poem + ALL_poems.split(
                                    "</h2>")[1].replace("  ", "&nbsp")

                                tmp_poem = "[A generated-poem based upon: '" + title + "' by " + author + "]\n\n" + new_title + "\nby " + new_author + "\n" + poem_replaced

                                print "\n~~~\n\n" + tmp_poem
                                #print "\nORIGINAL:",bio

                                txt_fn = id.split("_")[1] + "_POEMs.txt"

                                WRITE_BIO_PATH = DATA_DIR + "generated/POEMS/POEMS_" + datetime.datetime.now(
                                ).strftime('%Y-%m-%d_%H') + "/"
                                if not os.path.exists(WRITE_BIO_PATH):
                                    os.makedirs(WRITE_BIO_PATH)

                                txt_fn_path = WRITE_BIO_PATH + txt_fn
                                f_txt = open(txt_fn_path, 'w')
                                f_txt.write(tmp_poem)  #.encode('utf-8'))
                                f_txt.close()
                                #print "\nTXT file created at:",txt_fn_path

                                #######
                                #   write them all.... wasteful... but useful if run is interrupted....
                                ###########

                                # if cnt==1:
                                #     ALL_poems = ALL_poems_intro+ALL_poems
                                # else:
                                ALL_poems = ALL_poems_intro + ALL_poems.replace(
                                    "  ", "&nbsp")
                                ALL_poems = ALL_poems.replace(
                                    "$$datetime$$",
                                    datetime.datetime.now().strftime(
                                        '%Y-%m-%d at %H:%M'))
                                ALL_poems = ALL_poems.replace(
                                    "$$cnt$$", str(cnt))
                                print "cnt", cnt
                                ALL_poems = ALL_poems.replace(
                                    "$$gentime$$",
                                    str(time.time() - start_time))

                                # ALL POEMS
                                txt_fn = datetime.datetime.now().strftime(
                                    '%Y-%m-%d_%H'
                                ) + "_poetryFoundation_generatedPOEMS_CREELEYstyle_" + type_of_run + ".html"
                                txt_fn_path = DATA_DIR + "generated/POEMS/" + txt_fn
                                f_txt = open(txt_fn_path, 'w')
                                f_txt.write(ALL_poems + "</hmtl>")
                                f_txt.close()
                                print "\nTXT file created at:", txt_fn_path
                            except Exception, e:
                                print "At the final LOOP", e
                                continue

                        else:
                            print "~! EMPTY response:", author

                    else:
                        cnt = cnt - 1
示例#36
0
 def _find_verbs(self, word):
     word_bigrams = [(a[0], b[0]) for a, b in self.word_tag_pairs
                                            if a[0] == word.name and a[1] == 'NOUN' and b[1] == 'VERB'
                                            and en.conjugate(b[0], "inf") not in ('be', 'have')]
     return self.__get_best_collocations(word, word_bigrams)
示例#37
0
def change_tense_fixed(text, tense):
    if text == 'leave' and tense == PAST:
        return 'left'
    if text == 'quit' and tense == PAST:
        return 'quit'
    else: return conjugate(text, tense)