def extract_names(sentences, email, github_username): names = [] email_name = email.split('@') email_name = email_name[0] for tagged_sentence in sentences: for chunk in tagged_sentence: try: if chunk[1] == "NNP": names.append(chunk[0]) except: pass name_dict = None git_name_dict = None for name in names: ratio = fuzz.ratio(email_name.lower(), name.lower()) if name_dict: largest_ratio = name_dict[1] if ratio > largest_ratio: name_dict = [name, ratio] else: name_dict = [name, ratio] if github_username: git_ratio = fuzz.ratio(github_username.lower(), name.lower()) if git_name_dict: git_largest_ratio = git_name_dict[1] if git_ratio > git_largest_ratio: git_name_dict = [name, git_ratio] else: git_name_dict = [name, git_ratio] if git_name_dict: if git_name_dict[1] > name_dict[1]: return git_name_dict[0] return name_dict[0]
def extract_github_url(sentences): base_url = "https://github.com/" names = [] urls = [] for tagged_sentence in sentences: for chunk in tagged_sentence: try: if "github.com" in chunk[0]: urls.append(chunk[0]) if chunk[1] == "NNP": names.append(chunk[0]) except: pass name_dict = None has_found = False is_valid = False for url in urls: url_segments = url.split('/') username = url_segments[-1] github_url = base_url + username req = requests.get(github_url) if req.status_code == requests.codes.ok: return username, github_url for name in names: if has_found: username = name github_url = base_url + username req = requests.get(github_url) if req.status_code == requests.codes.ok: return username, github_url else: if "github" in name.lower(): has_found = True
def extractnames(self, sentences): names = [] for tagged_sentence in sentences: for chunk in nltk.ne_chunk(tagged_sentence): if type(chunk) == nltk.tree.Tree: if chunk.label() == 'PERSON': names.append(' '.join([c[0] for c in chunk])) return names
def getNames(self, dataFrame): names = [] for index, splitchunk in dataFrame.iterrows(): if not self.isValidName(splitchunk["name"]): continue names.append(splitchunk["name"]) return names
def get_names(text): doc = nlp(text) names = [] for x in doc.ents: if x.label_ == 'PERSON': names.append(x.text) names = list(set(names)) return names
def get_names(self, list_) -> list: names = [] utils = Utils() for splitchunk in list_: if not utils.isValidName(splitchunk.name): continue names.append(splitchunk.name) return names
def get_names(data): names=[] for sent in nltk.sent_tokenize(doc): for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))): if hasattr(chunk, 'node'): if chunk.node=='PERSON': leaf=chunk.leaves()[0] name = ' '.join(c[0] for c in chunk.leaves()) names.append(name) return names
def get_names(self, gender): if gender == "m": path = "male_names.txt" alternate_path = "tools/male_names.txt" else: path = "female_names.txt" alternate_path = "tools/female_names.txt" names = [] try: with open(path, "r") as f: for name in f: names.append(name.strip().title()) except FileNotFoundError: with open(alternate_path, "r") as f: for name in f: names.append(name.strip().title()) return names
def find_names(text, tags): """ Finds names with a Stanford tagger. These are assumed to be a speaker name if no speaker has been found. :param text: The text to search. :param tags: A list of accumulated strings which have been tagged so far. :return: The updated list of accumulated tags. """ stanford_tagger = StanfordNERTagger(STANFORD_TAGGER_DICTIONARY, STANFORD_TAGGER_PATH, encoding='utf-8') tokenised = nltk.word_tokenize(text) classified = stanford_tagger.tag(tokenised) names = [] current_name = "" found_name = False for token in classified: ''' The tag is in the following format: (string, tag) ''' token_word = token[0] token_tag = token[1] is_shortened_name = re.match("^\w+\.$", token_word) if token_tag == "PERSON" or token_word in NAMES or token_word.lower( ) in TITLES or is_shortened_name is not None or token_word in string.punctuation: if found_name is True and token_word not in string.punctuation: current_name += " " + token_word else: current_name += token_word found_name = True elif token_tag != "PERSON" and found_name: names.append(current_name) current_name = "" found_name = False lines = text.splitlines() for line in lines: stripped_line = line.strip() if stripped_line in names: tags.add((line, SPEAKER_TAG)) break return tags
nouns = [] for la in tagged: if (la[1].lower() == 'nnp'): nouns.append(la[0].lower()) name = raw_input("Enter name of freedom fighter") if name in mynames: print "Associated: 1.Names 2.Locations" ch = raw_input("Type 1 or 2") ch = int(ch) if ch == 1: names = [] for noun in nouns: for name1 in mynames: if (noun == name): names.append(noun) names = list(set(names)) names = names.remove(name) print "The names of freedom fighters associated with " + name + " are:" for i in names: print i elif ch == 2: locations = [] for noun in nouns: for loc in mylocations: if (noun == loc): locations.append(noun) locations = list(set(locations)) print "The location associated with " + name + " are:"
nouns=[] for la in tagged: if (la[1].lower() == 'nnp'): nouns.append(la[0].lower()) name=raw_input("Enter name of freedom fighter") if name in mynames: print "Associated: 1.Names 2.Locations" ch=raw_input("Type 1 or 2") ch=int(ch) if ch==1: names=[] for noun in nouns: for name1 in mynames: if (noun == name): names.append(noun) names=list(set(names)) names=names.remove(name) print "The names of freedom fighters associated with "+name+" are:" for i in names: print i elif ch==2: locations=[] for noun in nouns: for loc in mylocations: if (noun == loc): locations.append(noun) locations=list(set(locations)) print "The location associated with "+name+" are:"