Python search примеры, pattern.search.search Python примеры использования

Пример #1

0

Показать файл

Файл: NLP.py Проект: hungtraan/FacebookBot

def handleBotInfo(sentence):
    name = ["Optimus... ah no, Optimist Prime :D", "I.am.the.legendary.Optimist.Prime B-)", "The most Optimist Prime! B-)", "You knew already *tsk tsk*"]
    creator = ["It's a mystery :O", "Are you optimist enough to know? ;)", "You are among the few who I tell: All I know about my creator is the initials HT :)", "It remains a mystery to me even :(", "It was erased from my memory from the start :("]

    m = search('what *+ your name', sentence)
    if len(m) > 0:
        return oneOf(name)

    m = search('VP+ *+ your name', sentence)
    if len(m) > 0:
        return oneOf(name)

    m = search('who *+ your creator|dad|mom|father|mother|papa|mama|daddy|mommy', sentence)
    if len(m) > 0:
        return oneOf(creator)

    m = search('VP+ *+ your creator|dad|mom|father|mother|papa|mama|daddy|mommy', sentence)
    if len(m) > 0:
        return oneOf(creator)

    m = search('who *+ creates|created|gave_birth *+ you', sentence)
    if len(m) > 0:
        return oneOf(creator)

    return "Can you guess? ;)"

Пример #2

0

Показать файл

Файл: tjtool.py Проект: MsLimon/etiquetador-de-noticias

    def _extract_reporters(self):
        """ Extract the reporters and entities from those sentence of the text
            where a reported speech verb is used.
        """
        # search for those sentences with reported speech verbs
        sentences = [s for s in self.__tree if search('RPTVRB|según', s)]

        for s in sentences:
            s_str = s.string
            sent_nlp = self.nlp(s_str)           
            verb = search('RPTVRB|según',s)[0].string
            shortest_dist = np.inf
            shortest_word = []
            for ent in sent_nlp.ents:
                # calculate distance
                dist = self._get_distance(verb, ent.text, s_str)
                # store all proper nouns in entities
                word = Word(s, ent.text, tag=None, index=s.id)
                self.__entities.append(word)
                # PER and ORG type entities closest to a reporter verb
                if ent.label_ in ["PER","ORG"] and abs(dist) < shortest_dist:
                    word = Word(s, ent.text, tag='NNP', index=s.id)
                    shortest_dist = abs(dist)
                    shortest_word = word
            if shortest_word and abs(dist) < self._max_dist:
                self.__reporters.append(shortest_word)

Пример #3

0

Показать файл

def isAskingBotInformation(sentence):
    m = search('what *+ your name', sentence)
    if len(m) > 0:
        return True

    m = search('VP+ *+ your name', sentence)
    if len(m) > 0:
        return True

    m = search(
        'who *+ your creator|dad|mom|father|mother|papa|mama|daddy|mommy',
        sentence)
    if len(m) > 0:
        return True

    m = search('VP+ *+ your creator|dad|mom|father|mother', sentence)
    if len(m) > 0:
        return True

    m = search('who made|are|created|create|wrote|gave_birth|built you',
               sentence)
    if len(m) > 0:
        return True

    return False

Пример #4

0

Показать файл

def re_search(text, search_string, strict=False):
    tree = parsetree(text, lemmata=True)
    if strict:
        results = search(search_string, tree, STRICT)
    else:
        results = search(search_string, tree)
    return results

Пример #5

0

Показать файл

Файл: search.py Проект: c02y/patent-generator

def re_search(text, search_string, strict=False):
    tree = parsetree(text, lemmata=True)
    if strict:
        results = search(search_string, tree, STRICT)
    else:
        results = search(search_string, tree)
    return results

Пример #6

0

Показать файл

def process(statement, database_name=DATABASE_NAME):
    ''' Allows us to create entities via statements like "There is a course CSCI4702 called Mobile Programming" 
      and modify entities with statements like "CSCI4702 has a start date of Jan 31st 2013"
      
      already encountering a statement like "There is a game engine Unity3d" gives us trouble
      seems like we need named entity recognition to be able to extract types like that ... or perhaps rely on capitalization
      which doesn't really work for things like CTO as a category of items, hmm
      
      >>> sent = "There is a game engine Unreal Engine".split()
      >>> print nltk.ne_chunk(nltk.pos_tag(sent))
      '''
    # this runs real fast, but it doesn't quite get the NN/NNP combination I hoped for from "There is a game engine Unity3D"
    # although it does now with light=True setting, but now it doesn't get the NNP in "There is a game engine Source"

    s = parse(statement, relations=True, lemmata=True, light=True)
    s = split(s)

    #result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
    s, result = extract(statement)
    if result:
        #try:
        noun = search('(NN)+', s)[0].string
        table = pluralize(noun.replace(' ', '_'))
        result = search(
            '(JJ|NNPS|NNP)+', s
        )  # this pulls in adjectives, but there's supposed to be a better fix coming
        ident = result[0].string
        name = result[1].string if len(result) > 1 else ident
        #raise Exception(table+"; "+ident+"; "+name)
        return newTable(table, ident, name, database_name)
    #except:
    #return regexMatch(statement,database_name)
    else:
        return regexMatch(statement, database_name)

Пример #7

0

Показать файл

Файл: faq.py Проект: VRDate/twss

def process(statement,database_name = DATABASE_NAME):
  ''' Allows us to create entities via statements like "There is a course CSCI4702 called Mobile Programming" 
      and modify entities with statements like "CSCI4702 has a start date of Jan 31st 2013"
      
      already encountering a statement like "There is a game engine Unity3d" gives us trouble
      seems like we need named entity recognition to be able to extract types like that ... or perhaps rely on capitalization
      which doesn't really work for things like CTO as a category of items, hmm
      
      >>> sent = "There is a game engine Unreal Engine".split()
      >>> print nltk.ne_chunk(nltk.pos_tag(sent))
      '''
  # this runs real fast, but it doesn't quite get the NN/NNP combination I hoped for from "There is a game engine Unity3D"
  # although it does now with light=True setting, but now it doesn't get the NNP in "There is a game engine Source"

  s = parse(statement, relations=True, lemmata=True, light=True) 
  s = split(s)

  #result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
  s, result = extract(statement)
  if result:
    #try:
      noun = search('(NN)+', s)[0].string
      table = pluralize(noun.replace(' ','_'))
      result = search('(JJ|NNPS|NNP)+', s) # this pulls in adjectives, but there's supposed to be a better fix coming
      ident = result[0].string
      name = result[1].string if len(result) > 1 else ident
      #raise Exception(table+"; "+ident+"; "+name)
      return newTable(table,ident,name,database_name)
    #except:
      #return regexMatch(statement,database_name)
  else:
    return regexMatch(statement,database_name)

Пример #8

0

Показать файл

Файл: NLP.py Проект: hungtraan/FacebookBot

def isGetNews(sentence):
    m = search('{VP} {VBG+? JJ+?} {news | information} about|on|regarding { *+ }', sentence)
    if len(m) > 0:
        if m[0].group(1).string.lower() in ['look', 'get', 'find', 'tell', 'show', 'fetch', 'search']:
            return True

    # Solve special case when "Get" at the beginning of sentence is recognized as 
    # a proper noun
    m = search('get|find|look *+ news|information about|on|regarding', sentence)
    if len(m) > 0:
        return True

    return False

Пример #9

0

Показать файл

Файл: searcher.py Проект: antiboredom/videogrep

def re_search(text, search_string, strict=False):
    try:
        from pattern.search import STRICT, search
        from pattern.en import parsetree
    except:
        print('Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip')
        sys.exit()

    tree = parsetree(text, lemmata=True)
    if strict:
        results = search(search_string, tree, STRICT)
    else:
        results = search(search_string, tree)
    return results

Пример #10

0

Показать файл

def re_search(text, search_string, strict=False):
    try:
        from pattern.search import STRICT, search
        from pattern.en import parsetree
    except:
        print('Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip')
        sys.exit()

    tree = parsetree(text, lemmata=True)
    if strict:
        results = search(search_string, tree, STRICT)
    else:
        results = search(search_string, tree)
    return results

Пример #11

0

Показать файл

Файл: NLP.py Проект: hungtraan/FacebookBot

def isYelp(sentence):
    verbs = findVerb(sentence)
    noun_phrases = findNounPhrase(sentence)
    # If match key verbs
    yelpVerbs = ['eat', 'drink', 'find', 'display', 'get']
    for verb in verbs:
        if verb.lower() in yelpVerbs:
            if "news" in noun_phrases or "information" in noun_phrases and "news stand" not in noun_phrases and "newsstand" not in noun_phrases:
                return False

            yelpNouns = ['restaurant', 'food', 'drink', 'shop', 'store', 'bar', 'pub']
            for noun in yelpNouns:
                if noun in noun_phrases:
                    return True

    # If match question/command structure
    # "is there" + noun phrase 
    if "is there" in sentence.string \
        or "are there" in sentence.string \
        and noun_phrases != "":

        return True
    
    # noun phrase + "near by"
    nearby = nearBy(sentence)
    if noun_phrases != "" and nearby:
        return True

    m = search('{fine|find|get|find|show|search} { *+ }', sentence)
    # Sometimes Speech to Text misunderstood "find" as "fine"
    print m
    if len(m) > 0:
        return True

    return False

Пример #12

0

Показать файл

def find_all_matches_by_ziyu(text, the_pattern):
    tree = parsetree(text, lemmata=True)
    results = search( the_pattern  , tree, STRICT)
    output = []
    for match in results:
        word_list = []
        for word in match:
            word_list.append(word.string)
        sentence = " ".join(word_list)
        output.append(sentence)
    
    # gen_num = 0
    # if len(output) > 0 and len(output)<2:
    #     gen_num=1
    # elif len(output) >= 2:
    #     gen_num=2

    # random_number = []
    
    # while len(random_number) != gen_num:
    #     r = random.randint(0,len(output))
    #     if r not in random_number:
    #         random_number.append(r)

    # final_output = []

    # if len(output) > 0:
    #     print "have OUTPUT"
    #     print random_number
    #     for i in range(gen_num):
    #         print i
    #         final_output.append(output[random_number[i]])

    return output

Пример #13

0

Показать файл

Файл: tjtool.py Проект: MsLimon/etiquetador-de-noticias

    def _extract_reporters(self):
        """ Extract the reporters and entities from those sentence of the text
            where a reported speech verb is used.
        """
        # search for those sentences with reported speech verbs
        sentences = [s for s in self.__tree if search('RPTVRB|según', s)]
        # search for proper nouns that are not locations
        pattern = Pattern.fromstring('!LOCATION|NNP+',
                                     STRICT, taxonomy=TAXONOMY)

        for s in sentences:
            matches = pattern.search(s)

            for m in matches:
                for w in m.words:
                    # chunks with roles (SBJ, OBJ) connected to a reporter verb
                    if ((w.chunk.role is not None) and
                        (w.chunk.verb.head.lemma in taxonomy)):
                        if self._is_composed_noun(w):
                            self.__reporters.append(w.previous())
                        self.__reporters.append(w)
                    # proper nouns not spotlighted as reported
                    else:
                        if self._is_composed_noun(w):
                            self.__entities.append(w.previous())
                        self.__entities.append(w)

Пример #14

0

Показать файл

Файл: query.py Проект: Murlocks/Ngram-Tiling-QA

 def getQueries(self):
     queries = []
     WP = 'who|what|when|where|why|how|which'
     patterns = [
             # Some verbs are mislabeled as nouns
             # When *+ is used next to NP, it swallows parts of the NP
             # Becuase of this, using {JJ|NN*+} to capture NPs in some cases
             # [NUMS] -> rearrange captured groups in order of NUMS
             # [(x, y)] -> conjugates x into the tense of y
             # ex: [1, (2, 3)] -> "(First Group) (Second Group conjugated to tense of Third Group)"
             (WP + ' {be} {NP}',
                 "queries.append((self.joinGroups(match[0], [2, 1]), 1, 'R'))"),
             (WP + ' {be} {NP} {VB*|NN}',
                 "queries.append((self.joinGroups(match[0], [2, 1, 3]), 3, 'R'))"),
             (WP + ' {be} {NP} {VB*|NN} {*+}',
                 "queries.append((self.joinGroups(match[0], [4, 2, 1, 3]), 4, 'R'))"),
             (WP + ' {do} {NP} {VB*|NN}',
                 "queries.append((self.joinGroups(match[0], [2, (1, 3)]), 5, 'R'))"),
             (WP + ' {do} {NP} {VB*|NN} {*+}',
                 "queries.append((self.joinGroups(match[0], [4, 2, (1, 3)]), 5, 'R'))"),
             (WP + ' {NP} {VB*|NN} {*+}',
                 "queries.append((self.joinGroups(match[0], [1, 3, 2]), 3, 'R'))"),
             (WP + ' {VB*|NN} {JJ|NN*+} {*+}',
                 "queries.append((self.joinGroups(match[0], [3, 2, 1]), 2, 'R'))"),
             (WP + ' {NP} {VB*|NN} {*+}',
                 "queries.append((self.joinGroups(match[0], [2, 1, 3]), 4, 'L'))"),
             (WP + ' {VB*|NN} {JJ|NN*+} {*+}',
                 "queries.append((self.joinGroups(match[0], [1, 2, 3]), 4, 'L'))")
             ]
     t = parsetree(self._q.strip('?'), lemmata=True)
     for p, c in patterns:
         match = search(p, t)
         if match:
             exec c
     return queries + [(self.getKeyWords(t), 1, 'A')] + [(self._q, 2, 'A')]

Пример #15

0

Показать файл

Файл: visualization.py Проект: riffschelder/reviewshub

def compare_visualization(product_sku, compare_phrase):
    all_reviews = ReviewInfo.objects.all().filter(sku=product_sku)
    g = Graph()

    count = 0.0
    for e in all_reviews :
        s = e.comment.lower() 
        s = plaintext(s)
        s = parsetree(s)
        #p = '{NP} (VP) faster than {NP}'
        p = '{NP} (VP) ' + compare_phrase + ' {NP}'
        for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
                g.add_node(x)
            if y not in g:
                g.add_node(y)
            g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A
        count += 1.0
        print count/len(all_reviews), '\r'

    if len(g) > 0: 
        g = g.split()[0] # Largest subgraph.
        for n in g.sorted()[:80]: # Sort by Node.weight.
            n.fill = (0, 0.5, 1, 0.75 * n.weight)

        g.export('static/compare_visualization', directed=True, weighted=2.0)
        return True
    else: 
        return False

Пример #16

0

Показать файл

Файл: pattern_data.py Проект: shohrukh92/osint_tools_security_auditing

def get_pattern_data(search_param):
   
   twitter = Twitter(language='en') 
   
   for tweet in twitter.search(search_param, cached=True):
      print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))
   

   g = Graph()
   for i in range(10):
      for result in twitter.search(search_param, start=i+1,count=50):
         s = result.text.lower() 
         s = plaintext(s)
         s = parsetree(s)
         p = '{NP} (VP) ' +search_param+ ' {NP}'
         for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
               g.add_node(x)
               if y not in g:
                  g.add_node(y)
               g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A

   #if len(g)>0:   
   #   g = g.split()[0] # Largest subgraph.

   for n in g.sorted()[:40]: # Sort by Node.weight.
      n.fill = (0, 0.5, 1, 0.75 * n.weight)

   g.export('data', directed=False, weighted=0.6)

Пример #17

0

Показать файл

Файл: extractor.py Проект: bluepolarfox/twss

def basicExtract(statement):

  #s = Sentence(parse(statement, relations=True, lemmata=True, light=True))
  #p = Pattern.fromstring('(DT) (RB) (JJ) NN+')
  s = Sentence(parse(statement, lemmata=True))
  m = search("There be DT {JJ? NN}", s)
  return m

Пример #18

0

Показать файл

def get_noun_phrases_fr_text(text_parsetree, print_output = 0, phrases_num_limit =5, stopword_file=''):
    """ Method to return noun phrases in target text with duplicates
        The phrases will be a noun phrases ie NP chunks.
        Have the in build stop words --> check folder address for this.
        Args:
            text_parsetree (pattern.text.tree.Text): parsed tree of orginal text

        Kwargs:
            print_output (bool): 1 - print the results else do not print.
            phrases_num_limit (int): return  the max number of phrases. if 0, return all.
        
        Returns:
            (list): list of the found phrases. 

    """
    target_search_str = 'NP' #noun phrases
    target_search = search(target_search_str, text_parsetree)# only apply if the keyword is top freq:'JJ?+ NN NN|NNP|NNS+'

    target_word_list = []
    for n in target_search:
        if print_output: print retrieve_string(n)
        target_word_list.append(retrieve_string(n))

    ## exclude the stop words.
    if stopword_file:
        with open(stopword_file,'r') as f:
            stopword_list = f.read()
        stopword_list = stopword_list.split('\n')

    target_word_list = [n for n in target_word_list if n.lower() not in stopword_list ]

    if (len(target_word_list)>= phrases_num_limit and phrases_num_limit>0):
        return target_word_list[:phrases_num_limit]
    else:
        return target_word_list

Пример #19

0

Показать файл

def get_pattern_data(search_param):

    twitter = Twitter(language='en')

    for tweet in twitter.search(search_param, cached=True):
        print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))

    g = Graph()
    for i in range(10):
        for result in twitter.search(search_param, start=i + 1, count=50):
            s = result.text.lower()
            s = plaintext(s)
            s = parsetree(s)
            p = '{NP} (VP) ' + search_param + ' {NP}'
            for m in search(p, s):
                x = m.group(1).string  # NP left
                y = m.group(2).string  # NP right
                if x not in g:
                    g.add_node(x)
                    if y not in g:
                        g.add_node(y)
                    g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75))  # R,G,B,A

    #if len(g)>0:
    #   g = g.split()[0] # Largest subgraph.

    for n in g.sorted()[:40]:  # Sort by Node.weight.
        n.fill = (0, 0.5, 1, 0.75 * n.weight)

    g.export('data', directed=False, weighted=0.6)

Пример #20

0

Показать файл

def get_phrases_contain_keyword(text_parsetree, keyword, print_output = 0, phrases_num_limit =5):
    """ Method to return phrases in target text containing the keyword. The keyword is taken as an Noun or NN|NP|NNS.
        The phrases will be a noun phrases ie NP chunks.
        Args:
            text_parsetree (pattern.text.tree.Text): parsed tree of orginal text
            keyword (str): can be a series of words separated by | eg "cat|dog"

        Kwargs:
            print_output (bool): 1 - print the results else do not print.
            phrases_num_limit (int): return  the max number of phrases. if 0, return all.
        
        Returns:
            (list): list of the found phrases. (remove duplication )

    """
    ## Regular expression matching.
    ## interested in phrases containing the traget word, assume target noun is either adj or noun
    target_search_str = 'JJ|NN|NNP|NNS?+ ' + keyword + ' NN|NNP|NNS?+'
    target_search = search(target_search_str, text_parsetree)# only apply if the keyword is top freq:'JJ?+ NN NN|NNP|NNS+'

    target_word_list = []
    for n in target_search:
        if print_output: print retrieve_string(n)
        target_word_list.append(retrieve_string(n))

    target_word_list_rm_duplicates = rm_duplicate_keywords(target_word_list)

    if (len(target_word_list_rm_duplicates)>= phrases_num_limit and phrases_num_limit>0):
        return target_word_list_rm_duplicates[:phrases_num_limit]
    else:
        return target_word_list_rm_duplicates

Пример #21

0

Показать файл

Файл: PatternSearch.py Проект: Darth-Neo/nl_lib

    def htmlSearch(self, html, url):
        logger.debug(u"htmlSearch URL : %s" % url)
        logger.debug(u"html : %s" % html[:20])
               
        s = html.lower()
        s = plaintext(s)
        s = parsetree(s)
        
        # self.logSentences(s)

        # Execute a Regular Expression Search
        p = r'(NN)+'
        q = search(p, s)

        # self.logPOS(q)

        # Iterate over all the words in the POS
        logger.debug(u"  q.Length=%d" % len(q))
        logger.debug(u"  q[]=%s" % q)
        
        self.g, self.urlConcepts, self.wordConcepts = self.addNodes(self.g, q, url,
                                                                    self.urlConcepts,
                                                                    self.wordConcepts)

        return self.urlConcepts, self.wordConcepts

Пример #22

0

Показать файл

Файл: Pattern_Parsing.py Проект: nakamichikun/google_search_module_alt

def get_phrases_contain_keyword(text_parsetree, keyword, print_output=0, phrases_num_limit=5):
    """ Method to return phrases in target text containing the keyword. The keyword is taken as an Noun or NN|NP|NNS.
        The phrases will be a noun phrases ie NP chunks.
        Args:
            text_parsetree (pattern.text.tree.Text): parsed tree of orginal text
            keyword (str): can be a series of words separated by | eg "cat|dog"

        Kwargs:
            print_output (bool): 1 - print the results else do not print.
            phrases_num_limit (int): return  the max number of phrases. if 0, return all.
        
        Returns:
            (list): list of the found phrases. (remove duplication )

    """
    ## Regular expression matching.
    ## interested in phrases containing the traget word, assume target noun is either adj or noun
    target_search_str = "JJ|NN|NNP|NNS?+ " + keyword + " NN|NNP|NNS?+"
    target_search = search(
        target_search_str, text_parsetree
    )  # only apply if the keyword is top freq:'JJ?+ NN NN|NNP|NNS+'

    target_word_list = []
    for n in target_search:
        if print_output:
            print retrieve_string(n)
        target_word_list.append(retrieve_string(n))

    target_word_list_rm_duplicates = rm_duplicate_keywords(target_word_list)

    if len(target_word_list_rm_duplicates) >= phrases_num_limit and phrases_num_limit > 0:
        return target_word_list_rm_duplicates[:phrases_num_limit]
    else:
        return target_word_list_rm_duplicates

Пример #23

0

Показать файл

 def test_search_function(self):
     # Assert search() function.
     s = Sentence(parse("Go on Bors, chop his head off!"))
     m = search.search("PRP*? NN*", s)
     self.assertEqual(m[0].string, "Bors")
     self.assertEqual(m[1].string, "his head")
     print "pattern.search.search()"

Пример #24

0

Показать файл

Файл: simple_cre.py Проект: bwbelljr/simple_cre

def find_causal_matches(unicode_string, causal_pattern, pattern_order):
    # Description: Searches text string and returns all cause-effect
    #              relationships based on specified pattern.
    # Inputs: unicode_string, raw text in Unicode format for Python 3
    #         causal_pattern, regex defining specific causal statement pattern
    #         pattern_order, specifying which noun phrase is cause or effect
    # Outputs: List of causal tuples [(cause, effect), ...] or empty list []

    # Initialize causal_tuple_list as empty list
    causal_tuple_list = []

    # Convert string to Pattern parsed text (with POS tags)
    t = parsetree(unicode_string, lemmata=True)

    # possible_matches is a list of all Pattern matches, given text and pattern
    possible_matches = search(causal_pattern, t, lemmata=True)

    # Add causal matches as tuples (cause, effect) to causal_tuple_list
    # Note, if possible_matches=[], there are no matches
    if possible_matches != []:
        # Extract cause-effect tuples and add to causal_tuple_list
        causal_tuple_list = extract_cause_effect_tuple(possible_matches,
                                pattern_order)

    final_causal_tuple_list = []

    for causal_tuple in causal_tuple_list:
        if (causal_tuple[0] in unicode_string) and (causal_tuple[1] in unicode_string):
            final_causal_tuple_list.append(causal_tuple)

    return(final_causal_tuple_list)

Пример #25

0

Показать файл

Файл: frames.py Проект: rsteckel/EDA

def features(sentence):    
    stop = nltk.corpus.stopwords.words('english')
    
    #ptree = parsetree(sentence, relations=True, lemmata=True)
    ptree = parsetree(sentence)
    matches = search('NP', ptree)
    phrases = []
    for match in matches:
        filtered_np = [ word for word in match if word.string.lower() not in stop ]
        if len(filtered_np) > 0:
            phrases.append( filtered_np )
    
    #for sentence in ptree:
    #    for chunk in sentence.chunks:
    #        if chunk.type == 'NP':
    #            print [(w.string, w.type) for w in chunk.words]
    
    sentence_sentiment = 'NEU'
    sent_result = sentiment(sentence)
    sent = sent_result[0]
    if sent > .1:
        sentence_sentiment  ='POS'
    elif sent < -.1:
        sentence_sentiment  ='NEG'
    
    sentence_subjectivity = 'OBJ'
    if sent_result[1] > .5:
        sentence_subjectivity = 'SUB'
    
    features = {}
    features['NP'] = phrases
    features['SN'] = sentence_sentiment
    features['SUB'] = sentence_subjectivity
    
    return features

Пример #26

0

Показать файл

Файл: General_feed_extract.py Проект: spidezad/general_feeds_extract

    def get_noun_phrase_fr_title(self,title):
        """ Get the NP from title. Use for comparing to company names to extract specific news.

        """
        t = parsetree(title, lemmata=True)
        target_search = search('NP', t)
        return target_search[0].group(0).string

Пример #27

0

Показать файл

def extract_verbs(tree):
	verb_matches = search('to|you {VB*}', tree)
	phrases = list()
	for match in verb_matches:
		if match.group(1)[0].type in ('VBG', 'VBZ'): continue
		if match.group(1)[0].string == "dream": continue
		phrases.append(tree[match.group(1).start:])
	return phrases

Пример #28

0

Показать файл

Файл: frames.py Проект: rsteckel/EDA

def print_feature(sentence):    
    ptree = parsetree(sentence) #, relations=True, lemmata=True)
    #It matches anything from food to cat food, tasty cat food, the tasty cat food, etc.
    t = parsetree('tasty cat food')
    matches = search('DT? RB? JJ? NN+', ptree)
    for match in matches:
        print match
    print '\n'

Пример #29

0

Показать файл

 def test_group(self):
     # Assert Match groups.
     s = Sentence(parse("the big black cat eats a tasty fish"))
     m = search.search("DT {JJ+} NN", s)
     self.assertEqual(m[0].group(1).string, "big black")
     self.assertEqual(m[1].group(1).string, "tasty")
     # Assert nested groups (and syntax with additional spaces).
     m = search.search("DT { JJ { JJ { NN }}}", s)
     self.assertEqual(m[0].group(1).string, "big black cat")
     self.assertEqual(m[0].group(2).string, "black cat")
     self.assertEqual(m[0].group(3).string, "cat")
     # Assert chunked groups.
     m = search.search("NP {VP NP}", s)
     v = m[0].group(1, chunked=True)
     self.assertEqual(v[0].string, "eats")
     self.assertEqual(v[1].string, "a tasty fish")
     print "pattern.search.Match.group()"

Пример #30

0

Показать файл

Файл: NLP.py Проект: repodiscover/FacebookBot-1

def isGetNews(sentence):
    m = search(
        '{VP} {VBG+? JJ+?} {news | information} about|on|regarding { *+ }',
        sentence)
    if len(m) > 0:
        if m[0].group(1).string.lower() in [
                'look', 'get', 'find', 'tell', 'show', 'fetch', 'search'
        ]:
            return True

    # Solve special case when "Get" at the beginning of sentence is recognized as
    # a proper noun
    m = search('get|find|look *+ news|information about|on|regarding',
               sentence)
    if len(m) > 0:
        return True

    return False

Пример #31

0

Показать файл

def extract_keyphrases_from_doc_pattern(item, key):
    # build parsetree, extract NP's
    pt = parsetree(item[key])
    noun_phrases = search('NP', pt)
    # convert np matches to unicode list
    noun_phrases = [np.string for np in noun_phrases]
    # remove ones too short, lemmatize, etc..
    cleankeys = regularise_keys(noun_phrases)
    return cleankeys

Пример #32

0

Показать файл

def measure_pattern_search():
    """ pattern 
      JJ|NN* NN*
      DT? JJ|NN?+ NN
      DT? JJ|NN*+ NN*
  """
    global pattern_search_result  #Make measure_me able to modify the value
    #print ('text_tree', text_tree)
    pattern_search_result = search(pattern_string, text_tree)

Пример #33

0

Показать файл

Файл: frames.py Проект: rsteckel/EDA

def taxonomy_normalize(sentence):    
    bp_match = search('BEAUTY_PARTS', parsetree(sentence, lemmata=True))
    facial_match = search('MAKEUP', parsetree(sentence, lemmata=True))
    feet_match = search('FEET', parsetree(sentence, lemmata=True))
    body_match = search('BODY', parsetree(sentence, lemmata=True))    
    
    matches = ''
    
    if len(bp_match) > 0:
        matches += 'BEAUTY_PARTS-'     
    if len(facial_match) > 0:
        matches += 'MAKEUP-'              
    if len(feet_match) > 0:
        matches += 'FEET-'
    if len(body_match) > 0:
        matches += 'BODY-'

    return matches

Пример #34

0

Показать файл

def extract_verb_phrases(tree):
	verb_phrase_matches = search('to|you {VP}', tree)
	phrases = list()
	if len(verb_phrase_matches) > 0:
		possible_matches = list()
		for match in verb_phrase_matches:
			if match.group(1)[0].string == "dream":
				continue
			phrases.append(tree[match.group(1).start:])
	return phrases

Пример #35

0

Показать файл

Файл: keywords_expert.py Проект: JoannaMisztalRadecka/PoEmo

    def add_keywords(self, phrase):

        sent = en.Sentence(en.parse(phrase))
        nouns = search('NN', sent)
        self.blackboard.pool.nouns.update(
            set(Word(en.singularize(n[0].string)) for n in nouns))
        adjs = search('JJ', sent)
        self.blackboard.pool.adjectives.update(
            set(Word(en.lemma(a[0].string)) for a in adjs))

        try:
            nps = search('NP', sent)
            for np in nps:
                self.blackboard.pool.epithets.update({
                    Word(en.singularize(w.string), "NN"):
                    [Word(jj.string, "JJ") for jj in np if "JJ" in jj.tag]
                    for w in np if "NN" in w.tag
                })
        except IndexError:
            pass

Пример #36

0

Показать файл

Файл: patterns.py Проект: hellcoderz/entity_detection

def nlp(bigram, sent):
	entity = []

	for tup in bigram:
		txt = " ".join(tup)
		#print txt
		m = search(txt, sent)
		if m:
			entity.append(txt)
		#print m

	return entity

Пример #37

0

Показать файл

def get_ngrams(description, lang='it'):
    """
    Analyze description and get relevant ngrams using an italian POS tagger,
    looking for exact combination of POS pattern
    """
    s = it_parsetree(description, relations=True, lemmata=True)
    if lang == "en":
        s = en_parsetree(description, relations=True, lemmata=True)

    matches = []
    ngrams = []
    for match in search("JJ NN", s):
        matches.append(match.constituents())
    for match in search("NN JJ", s):
        matches.append(match.constituents())
    for match in search("NN", s):
        matches.append(match.constituents())
    for match in matches:
        ngrams.append(" ".join([chunk.string
                                for chunk in match]).encode("utf8"))
    return remove_uncorrect_tokens(ngrams)

Пример #38

0

Показать файл

Файл: p2e11-27.py Проект: pibytes/pythonUNLP

def verbosInfinitivos(cadena):
	t = parsetree(cadena)
	verbos = search('VB*', t) 
	#lis=verbos.match.string
	#print 'list: ',lis
	#print #no puedo convertirlo a lista de una??
	lista =[]
	for match in verbos:
		lista.append((match.string , conjugate(match.string, INFINITIVE)))
	#print 'lista for: ',lista
	#print lista[3][1] 
	return lista

Пример #39

0

Показать файл

Файл: NLP.py Проект: hungtraan/FacebookBot

def isAskingBotInformation(sentence):
    m = search('what *+ your name', sentence)
    if len(m) > 0:
        return True

    m = search('VP+ *+ your name', sentence)
    if len(m) > 0:
        return True

    m = search('who *+ your creator|dad|mom|father|mother|papa|mama|daddy|mommy', sentence)
    if len(m) > 0:
        return True

    m = search('VP+ *+ your creator|dad|mom|father|mother', sentence)
    if len(m) > 0:
        return True

    m = search('who made|create|wrote|built you', sentence)
    if len(m) > 0:
        return True

    return False

Пример #40

0

Показать файл

def extract(statement):

  s = Sentence(parse(statement, lemmata=True))

  '''c1 = Constraint.fromstring("There be DT")
  c2 = Constraint.fromstring("NN+")
  c3 = Constraint.fromstring("(DT)")
  c4 = Constraint.fromstring("(RB) (JJ) NNP+")
  c5 = Constraint.fromstring("(call) (DT)")
  c6 = Constraint.fromstring("(RB) (JJ) (NNPS|NNP)+")
  p = Pattern(sequence=[c1, c2, c3, c4, c5, c6]) 
 
  match = p.search(s)
   '''
  s = find_entities(s)
   
   # not sure about this "be" thing - happy to match plural (is/are) but not sure about past tense ...
  match = search(MATCH_STRING_EXT, s)
  if not match:
    match = search(MATCH_STRING, s)
  #raise Exception(match)
  return s, match

Пример #41

0

Показать файл

def verbosInfinitivos(cadena):
    t = parsetree(cadena)
    verbos = search('VB*', t)
    print('verbos =', verbos)
    #lis=verbos.match.string
    #print ('list: ',lis)
    #print() #no puedo convertirlo a lista de una?? LAMBDA
    lista = []
    for match in verbos:
        lista.append((match.string, conjugate(match.string, INFINITIVE)))
    #print ('lista for: ',lista)
    #print (lista[3][1])
    return lista

Пример #42

0

Показать файл

Файл: inflate.py Проект: OAlm/the_stromberg_stories

def inflate(s):
    
    """ Returns an exaggerated string:
        inflate("I'm eating a burger") => "I'm eating hundreds of burgers".
    """
    
    # Part-of-speech tagging identifies word types in a text.
    # For example, "can" can be a noun (NN) or a verb (VB),
    # depending on the words surrounding it.
    # http://www.clips.ua.ac.be/pages/pattern-en#noc_parser
    
    # A parse tree splits punctuation marks from words, tags words,
    # and constructs a nested tree of sentences that contain words.
    # http://www.clips.ua.ac.be/pages/pattern-en#tree
    t = parsetree(s)
    
    # We can use pattern.search to search for patterns inside a parse tree.
    # If you know what regular expressions are: this is similar,
    # only you can also search by part-of-speech tag.
    # This is very useful to retrieve syntactic structures, e.g.:
    # "any noun, optionally preceded by an adjective", or
    # "any conjugation of the verb to be".
    # http://www.clips.ua.ac.be/pages/pattern-search
    
    # The search pattern below means:
    # "any determiner (a, an, the), optionally followed by any adjective,
    #  followed by one or more nouns".
    # The search will yield a list of matches.
    # We'll pluralize the nouns in each match, so that "burger" becomes "burgers", etc.
    # Note the curly braces {}.
    # We can retrieve the words inside it with match.group().
    for match in search("{DT} {JJ?} {NN+}", t):
        x = choice(["dozens of ", "hundreds of ", "thousands of "])
        
        # We'll only look at matches that start with "a" or "an".
        # This indicates an object or a thing of which many can exist.
        # If the match starts with "the", it might indicate something unique,
        # like "the capital of Nairobi". It doesn't make sense to transform
        # it into "hundreds of capitals of Nairobi".
        if match.group(1).string.lower() not in ("a", "an"):
            continue
        
        # Include the adjective, if any.
        if match.group(2):
            x += match.group(2).string
            x += " "
            
        # Pluralize the group of nouns.
        x += pluralize(match.group(3).string)
        s = s.replace(match.group(0).string, x)
    return s

Пример #43

0

Показать файл

Файл: mining_server.py Проект: msayadi724/Tunisia_5G_Forum

        def handle_starttag(self, tag, attrs):

            for attr in attrs:

                a = attr[1]
                a = a.split('/')
                a = [stem(a[j]) for j in range(len(a))]

                a = '_'.join(str(e) for e in a)

                for i in attr:

                    l = search('src', i)

                    b1 = search('5g', a)

                    b3 = search('imag', a)
                    b4 = search('video', a)
                    b5 = search('pdf', a)

                    if not len(b1) == 0:

                        if not len(b5) == 0:

                            if not attr[1][2:] in Pdfs[lien]:
                                Pdfs[lien].append(attr[1][2:])

                        if not len(b3) == 0:
                            if not attr[1][2:] in Images[lien]:
                                Images[lien].append(attr[1][2:])

                        if not len(b4) == 0:
                            if not attr[1][2:] in Videos[lien]:
                                Videos[lien].append(attr[1][2:])

                    if not len(l) == 0 and not len(b1) == 0:
                        if not attr[1][2:] in Images[lien]:
                            Images[lien].append(attr[1][2:])

Пример #44

0

Показать файл

Файл: NLP.py Проект: repodiscover/FacebookBot-1

def handleBotInfo(sentence):
    name = [
        "Optimus... ah no, Optimist Prime :D",
        "I.am.the.legendary.Optimist.Prime B-)",
        "The most Optimist Prime! B-)", "You knew already *tsk tsk*"
    ]
    creator = [
        "It's a mystery :O", "Are you optimist enough to know? ;)",
        "You are among the few who I tell: All I know about my creator is the initials HT :)",
        "It remains a mystery to me even :(",
        "It was erased from my memory from the start :("
    ]

    m = search('what *+ your name', sentence)
    if len(m) > 0:
        return oneOf(name)

    m = search('VP+ *+ your name', sentence)
    if len(m) > 0:
        return oneOf(name)

    m = search(
        'who *+ your creator|dad|mom|father|mother|papa|mama|daddy|mommy',
        sentence)
    if len(m) > 0:
        return oneOf(creator)

    m = search(
        'VP+ *+ your creator|dad|mom|father|mother|papa|mama|daddy|mommy',
        sentence)
    if len(m) > 0:
        return oneOf(creator)

    m = search('who *+ creates|created|gave_birth *+ you', sentence)
    if len(m) > 0:
        return oneOf(creator)

    return "Can you guess? ;)"

Пример #45

0

Показать файл

Файл: nlp.py Проект: jogsdjf/NLP-Project

def extractTrait(characterSentences):
    """
    Analyzes the sentence using serach module of pattern for adjective.
    """
    print(1)
    characterTrait = defaultdict(list)
    for key, value in characterSentences.items():
        for x in value:
            #print(x)
            #t=parsetree(x)
            characterTrait[key].append(search('JJ', parsetree(str(x))))
            #print(search('JJ',parsetree(str(x))))

    return characterTrait

Пример #46

0

Показать файл

Файл: perception.py Проект: OAlm/the_stromberg_stories

def learn(concept):
    """ Returns a list of properties for the given concept,
        collected from a "I think X is Y".
    """
    q = 'I think %s is *' % concept
    p = []
    g = Google(language='en', license=None)
    for i in range(10):
        for result in g.search(q, start=i, cached=True):
            m = plaintext(result.description)
            m = search(q, m) # Use * as a wildcard.
            if m:
                p.append(m[0][-1].string)
    return [w for w in p if w in PROPERTIES] # only handles known properties...

Пример #47

0

Показать файл

Файл: inflate.py Проект: romanorac/botsvsquotes

def inflate(s):
    """ Returns an exaggerated string:
        inflate("I'm eating a burger") => "I'm eating hundreds of burgers".
    """

    # Part-of-speech tagging identifies word types in a text.
    # For example, "can" can be a noun (NN) or a verb (VB),
    # depending on the words surrounding it.
    # http://www.clips.ua.ac.be/pages/pattern-en#parser

    # A parse tree splits punctuation marks from words, tags words,
    # and constructs a nested tree of sentences that contain words.
    # http://www.clips.ua.ac.be/pages/pattern-en#tree
    t = parsetree(s)

    # We can use pattern.search to search for patterns inside a parse tree.
    # If you know what regular expressions are: this is similar,
    # only you can also search by part-of-speech tag.
    # This is very useful to retrieve syntactic structures, e.g.:
    # "any noun, optionally preceded by an adjective", or
    # "any conjugation of the verb to be".
    # http://www.clips.ua.ac.be/pages/pattern-search

    # The search pattern below means:
    # "any determiner (a, an, the), optionally followed by any adjective,
    #  followed by one or more nouns".
    # The search will yield a list of matches.
    # We'll pluralize the nouns in each match, so that "burger" becomes "burgers", etc.
    # Note the curly braces {}.
    # We can retrieve the words inside it with match.group().
    for match in search("{DT} {JJ?} {NN+}", t):
        x = choice(["dozens of ", "hundreds of ", "thousands of "])

        # We'll only look at matches that start with "a" or "an".
        # This indicates an object or a thing of which many can exist.
        # If the match starts with "the", it might indicate something unique,
        # like "the capital of Nairobi". It doesn't make sense to transform
        # it into "hundreds of capitals of Nairobi".
        if match.group(1).string.lower() not in ("a", "an"):
            continue

        # Include the adjective, if any.
        if match.group(2):
            x += match.group(2).string
            x += " "

        # Pluralize the group of nouns.
        x += pluralize(match.group(3).string)
        s = s.replace(match.group(0).string, x)
    return s

Пример #48

0

Показать файл

Файл: tests.py Проект: danieleguido/sven

  def test_parse_sentences(self):
    texts = [
      pattern.en.Text(pattern.en.parse("Mary had a little lamb and it was really gorgeous. None.",lemmata=True)),
      pattern.fr.Text(pattern.fr.parse("Mary avait un agneau et il etait vraiment sympa. Personne.",lemmata=True))
    ]

    nps = []
    
    for text in texts:
      for sentence in text:
        for match in search('NP', sentence):
          for word in match.words:
            nps.append(word.lemma)

    self.assertEqual(nps, [u'mary', u'a', u'little', u'lamb', u'it', u'none', u'mary', u'un', u'agneau', u'et', u'il', u'personne'])

Пример #49

0

Показать файл

def test_search():  
    from pattern.search import search
    from pattern.en import parsetree
      
    t = parsetree('big white rabbit')
    print t
    print
    print search('JJ', t) # all adjectives
    print search('NN', t) # all nouns
    print search('NP', t) # all noun phrases

Пример #50

0

Показать файл

Файл: NLP.py Проект: repodiscover/FacebookBot-1

def fullQuery(sentence):
    new_str = ""
    for word in sentence.words:
        if word.string in ['places', 'locations', 'spots']:
            continue
        new_word = singularize(
            word.string) if word.type == "NNS" else word.string
        new_str += new_word + " "
    singularized_sentence = parsetree(new_str, relations=True, lemmata=True)

    m = search('{JJ? NN+} IN {JJ? NN+}', singularized_sentence)
    query = {}
    if len(m) > 0:
        query["term"] = m[0].group(1).string
        query["location"] = m[0].group(2).string
    return query

Пример #51

0

Показать файл

Файл: NLP.py Проект: hungtraan/FacebookBot

def fullQuery(sentence):
    new_str = ""
    for word in sentence.words:
        if word.string in ['places', 'locations', 'spots']:
            continue
        new_word = singularize(word.string) if word.type == "NNS" else word.string
        new_str += new_word + " "
    singularized_sentence = parsetree(new_str, relations=True, lemmata=True)

    
    m = search('{JJ? NN+} IN {JJ? NN+}', singularized_sentence)
    query = {}
    if len(m) > 0:
        query["term"] = m[0].group(1).string
        query["location"] = m[0].group(2).string
    return query

Пример #52

0

Показать файл

Файл: load_data.py Проект: goyalankit/nlp-metaphors

def load_data(data_file):
    corpus_data = []
    corpus_target = []
    print "Reading data file: {}".format(data_file)
    corpus_file = open (data_file, "r")

    print "Importing data..."
    lines = []
    for line in corpus_file:
        entry = Entry()
        line_parts = line.split("\t")
        # data validity check
        assert len(line_parts) == 4
        entry.figurative = True if (line_parts[2] == "figuratively") else False
        # initial pre-process
        phrase     = line_parts[1].decode('utf8').lower()
        sentences  = remove_tags(line_parts[3].decode('utf8').lower())

        entry.phrase       = wordpunct_tokenize(phrase)
        entry.phrase_lemma = [lemmatize(w) for w in entry.phrase]

        # clean up and parse sentence
        entry.sentences = sent_tokenize(sentences)
        entry.sentence  = np.array([wordpunct_tokenize(x) for x in entry.sentences])
        #entry.pos       = pos_tag(entry.sentence)
        entry.sentence  = np.hstack(entry.sentence)
        entry.sentence_lemma = np.array([lemmatize(w) for w in entry.sentence])

        # find match of phrase (original strings)
        phrase_match = search(" ".join(entry.phrase_lemma),
                              " ".join(entry.sentence_lemma))
        if len(phrase_match) > 0:
            # isolate context (remove phrase)
            context_select = np.ones(len(entry.sentence), dtype=np.bool)
            start  = phrase_match[0].start
            stop   = phrase_match[0].stop
            context_select[start:stop] = False
            entry.context       = entry.sentence[context_select]
            entry.context_lemma = entry.sentence_lemma[context_select]
        else:
            #print u"phrase {} not found in sentence {}?".format(phrase, sentences)
            entry.context = entry.sentence
            entry.context_lemma = entry.sentence_lemma

        lines.append(entry)
    return lines

Пример #53

0

Показать файл

Файл: TextMining.py Проект: mesrumpled/TextMining

def adjectives(L):
    """Returns a list of adjecives present in input lists.

    >>> adjectives([['big', 'white', 'tall', 'dog'], ['bat', 'tall']])
    ['big', 'white', 'tall', 'tall']
    >>> adjectives([['march'], ['yes', 'i', 'know', 'its', 'almost', 'march']])
    []
    """

    adjs = []
    for l in range(len(L)):
        current_string = " ".join(L[l])
        parts_of_speech = parsetree(current_string)     
        for i in search("JJ", parts_of_speech):     # Search the parsed string for adjectives
            adjs.append(str(i.string))

    return adjs

Пример #54

0

Показать файл

Файл: PatternSearch.py Проект: Darth-Neo/nl_lib

 def patternSearch(self, n=12, m=50):
     logger.info(u"patternSearch")
  
     proxyList = list()
     proxyList.append(u"3128")
     proxyList.append(u"206.217.138.154")
     
     logger.info(u"proxyList - %s" % proxyList)
  
     engine = Google(license=None, throttle=0.5, language=None)
     # engine = Bing(license=None, throttle=0.5, language=None)
 
     for i in range(n):                
         logger.info(u"Search %d" % i)
         results = engine.search(self.searchTerm, start=i+1, count=m, cached=False, proxy=proxyList)
         
         for r in results:
             logger.debug(u"Result=%s" % r.text)
             url = r.url
             logger.debug(u"URL=%s" % url)
             
             # if url[-4:] == ".com":
             #    continue
                     
             s = r.text.lower()
             s = plaintext(s)
             s = parsetree(s)
 
             # self.logSentences(s)
 
             # Execute a Regular Expression Search
             # p = r'(NN)+ (VB)+'
             p = r'(NN)+'
             q = search(p, s)
 
             # logPOS(q)
 
             # Iterate over all the words in the POS
             logger.debug(u"  q.Length=%d" % len(q))
             logger.debug(u"  q[]=%s" % q)
 
             self.g, self.urlConcepts, self.wordConcepts = \
                 self.addNodes(self.g, q, url, self.urlConcepts, self.wordConcepts)
     
     return self.urlConcepts, self.wordConcepts

Пример #55

0

Показать файл

Файл: categorize_beauty.py Проект: rsteckel/EDA

def extract_noun_phrases(body_part_name):    
    stop = nltk.corpus.stopwords.words('english')    
    filename = '/Users/rsteckel/tmp/Observable_body_parts-sentences-BODYPART1.tsv'
    
    df = pd.read_csv(filename, sep='\t', encoding='utf-8')
    df['lemmas'] = df['themeword'].apply(lambda x: lemma(x))
    
    sentences = df[ df['lemmas'] == body_part_name]['sentence'].tolist()
    
    phrases = []
    for sentence in sentences:
        ptree = parsetree(sentence)
        matches = search('NP', ptree)        
        for match in matches:
            filtered_np = [ word for word in match if word.string.lower() not in stop ]
            if len(filtered_np) > 0:
                phrases.append( (sentence, filtered_np) )
    
    return pd.DataFrame(phrases, columns=['sentence', 'phrase'])

Python search примеры использования