Пример #1
0
def extract_names(sentences, email, github_username):
    names = []
    email_name = email.split('@')
    email_name = email_name[0]
    for tagged_sentence in sentences:
        for chunk in tagged_sentence:
            try:
                if chunk[1] == "NNP":
                    names.append(chunk[0])
            except:
                pass
    name_dict = None
    git_name_dict = None
    for name in names:
        ratio = fuzz.ratio(email_name.lower(), name.lower())
        if name_dict:
            largest_ratio = name_dict[1]
            if ratio > largest_ratio:
                name_dict = [name, ratio]
        else:
            name_dict = [name, ratio]

        if github_username:
            git_ratio = fuzz.ratio(github_username.lower(), name.lower())
            if git_name_dict:
                git_largest_ratio = git_name_dict[1]
                if git_ratio > git_largest_ratio:
                    git_name_dict = [name, git_ratio]
            else:
                git_name_dict = [name, git_ratio]

    if git_name_dict:
        if git_name_dict[1] > name_dict[1]:
            return git_name_dict[0]
    return name_dict[0]
Пример #2
0
def extract_github_url(sentences):
    base_url = "https://github.com/"
    names = []
    urls = []
    for tagged_sentence in sentences:
        for chunk in tagged_sentence:
            try:
                if "github.com" in chunk[0]:
                    urls.append(chunk[0])
                if chunk[1] == "NNP":
                    names.append(chunk[0])
            except:
                pass
    name_dict = None
    has_found = False
    is_valid = False
    for url in urls:
        url_segments = url.split('/')
        username = url_segments[-1]
        github_url = base_url + username
        req = requests.get(github_url)
        if req.status_code == requests.codes.ok:
            return username, github_url

    for name in names:
        if has_found:
            username = name
            github_url = base_url + username
            req = requests.get(github_url)
            if req.status_code == requests.codes.ok:
                return username, github_url
        else:
            if "github" in name.lower():
                has_found = True
Пример #3
0
 def extractnames(self, sentences):
     names = []
     for tagged_sentence in sentences:
         for chunk in nltk.ne_chunk(tagged_sentence):
             if type(chunk) == nltk.tree.Tree:
                 if chunk.label() == 'PERSON':
                     names.append(' '.join([c[0] for c in chunk]))
     return names
Пример #4
0
    def getNames(self, dataFrame):
        names = []
        for index, splitchunk in dataFrame.iterrows():
            if not self.isValidName(splitchunk["name"]):
                continue
            names.append(splitchunk["name"])

        return names
Пример #5
0
def get_names(text):
    doc = nlp(text)
    names = []
    for x in doc.ents:
        if x.label_ == 'PERSON':
            names.append(x.text)

    names = list(set(names))
    return names
Пример #6
0
    def get_names(self, list_) -> list:
        names = []
        utils = Utils()
        for splitchunk in list_:
            if not utils.isValidName(splitchunk.name):
                continue
            names.append(splitchunk.name)

        return names
Пример #7
0
def get_names(data):
	names=[]
	for sent in nltk.sent_tokenize(doc):
	    for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
	        if hasattr(chunk, 'node'):
	            if chunk.node=='PERSON':
	            	leaf=chunk.leaves()[0]
	            	name = ' '.join(c[0] for c in chunk.leaves())
	            	names.append(name)
	return names
Пример #8
0
 def get_names(self, gender):
     if gender == "m":
         path = "male_names.txt"
         alternate_path = "tools/male_names.txt"
     else: 
         path = "female_names.txt"
         alternate_path = "tools/female_names.txt"
     names = []
     try:
         with open(path, "r") as f:
             for name in f:
                 names.append(name.strip().title())
     except FileNotFoundError:
         with open(alternate_path, "r") as f:
             for name in f:
                 names.append(name.strip().title())
     return names
def find_names(text, tags):
    """
    Finds names with a Stanford tagger. These are assumed to be a speaker name if no speaker has been found.
    :param text: The text to search.
    :param tags: A list of accumulated strings which have been tagged so far.
    :return: The updated list of accumulated tags.
    """
    stanford_tagger = StanfordNERTagger(STANFORD_TAGGER_DICTIONARY,
                                        STANFORD_TAGGER_PATH,
                                        encoding='utf-8')
    tokenised = nltk.word_tokenize(text)
    classified = stanford_tagger.tag(tokenised)

    names = []
    current_name = ""
    found_name = False
    for token in classified:
        '''
        The tag is in the following format:
        (string, tag)
        '''
        token_word = token[0]
        token_tag = token[1]
        is_shortened_name = re.match("^\w+\.$", token_word)
        if token_tag == "PERSON" or token_word in NAMES or token_word.lower(
        ) in TITLES or is_shortened_name is not None or token_word in string.punctuation:
            if found_name is True and token_word not in string.punctuation:
                current_name += " " + token_word
            else:
                current_name += token_word
            found_name = True
        elif token_tag != "PERSON" and found_name:
            names.append(current_name)
            current_name = ""
            found_name = False

    lines = text.splitlines()
    for line in lines:
        stripped_line = line.strip()
        if stripped_line in names:
            tags.add((line, SPEAKER_TAG))
            break

    return tags
Пример #10
0
nouns = []
for la in tagged:
    if (la[1].lower() == 'nnp'):
        nouns.append(la[0].lower())

name = raw_input("Enter name of freedom fighter")
if name in mynames:
    print "Associated:  1.Names 2.Locations"
    ch = raw_input("Type 1 or 2")
    ch = int(ch)
    if ch == 1:
        names = []
        for noun in nouns:
            for name1 in mynames:
                if (noun == name):
                    names.append(noun)

        names = list(set(names))
        names = names.remove(name)
        print "The names of freedom fighters associated with " + name + " are:"
        for i in names:
            print i
    elif ch == 2:
        locations = []
        for noun in nouns:
            for loc in mylocations:
                if (noun == loc):
                    locations.append(noun)

        locations = list(set(locations))
        print "The location associated with " + name + " are:"
Пример #11
0
nouns=[] 
for la in tagged: 
    if (la[1].lower() == 'nnp'): 
        nouns.append(la[0].lower()) 
 
name=raw_input("Enter name of freedom fighter") 
if name in mynames: 
    print "Associated:  1.Names 2.Locations" 
    ch=raw_input("Type 1 or 2") 
    ch=int(ch) 
    if ch==1: 
                names=[] 
                for noun in nouns: 
                        for name1 in mynames: 
                            if (noun == name): 
                                    names.append(noun)     
 
                names=list(set(names)) 
                names=names.remove(name) 
                print "The names of freedom fighters associated with "+name+" are:" 
                for i in names: 
                            print i 
    elif ch==2: 
                locations=[] 
                for noun in nouns: 
                        for loc in mylocations: 
                            if (noun == loc): 
                                    locations.append(noun)     
 
                locations=list(set(locations)) 
                print "The location associated with "+name+" are:"