def analysis(Name, mydata, Familyflag, Suspect, Familynames, shift): tagger = ner.SocketNER(host="localhost", port=8080) sentences = tokenize.sent_tokenize(mydata) names = initialze() pronoun = initialze() Lname = lastname(Name) # Tagging out the sentences with the non criminal activities. for i in sentences: # shape = '(?:^|(?<= ))(%s)(?:(?= )|$)' if ( re.findall(".*?(%s).*?" % "|".join(ihatecrime), i, re.IGNORECASE) == [] and re.findall(".*?(%s).*?" % action, i) == [] and senty(i)[1] > 0 and re.findall("%s" % ("|".join(Lname)), i, re.IGNORECASE) != [] ): try: pronoun = tagger.get_entities(i)[shift] for pnoun in pronoun: if ( i == sentences[0] and ( re.findall("[By|by] (%s)" % pnoun, i) != [] or re.findall("CONTACT: (%s)" % pnoun, i) != [] ) ) or re.findall("(%s).*?(%s)" % (pnoun, options), i) != []: Name = list(set(Name) - set([pnoun])) elif pnoun in Name or pnoun.title() in Lname: names.append(pnoun.title()) elif pnoun in Name or pnoun in Lname: names.append(pnoun) elif len(pnoun.split()) == 1 and Familyflag == "True" and pnoun in " ".join(Familynames).split(): names.append(familydict[pnoun]) except: pass Lastname = list(set(names + Suspect)) Fullname = fulname(Name + Suspect, Lastname) Fullname.sort(key=len, reverse=True) a = [] d = [] for i in Fullname: for j in i.split(): if j not in d and len(i.split()) > 1: d.append(j) a.append(i) Fullname = list(set(a)) Famname = initialze() # Identifying the family member involved in the crime if Familyflag == "True": for i in Fullname: if i.split()[-1] in Lastname and i not in Fullname: forward = mydata.split(i)[1].split(". ")[0] backward = mydata.split(i)[0].split(". ")[-1] if senty(forward)[1] != 0 or senty(backward)[1] != 0: Famname.append(i) Fullname = Fullname + Famname return Fullname
def findname(Fullname, mydata, crimetitle, words, Lname): ctitle = initialze() gname = initialze() for i in Fullname: data = mydata for j in crimetitle: Flag = 0 try: # Identifies the Name of the person in government position or proved as innocent in the crime. # sentence following the name till the crimetitle forward = mydata.split(i)[1].split(j)[0] # sentence prior to the name till the crimetitle backward = mydata.split(i)[0].split(j)[-1] if backward.split() != []: # when the length of backward and forward is equal priority is provided to the forward if it's immediately # followed by a comma or when the backward reaches the first letter of the article. if mydata.split()[0] == backward.split()[0] or ( len(forward) == len(backward) and forward[0] == "," ): backward = mydata try: # When the forward reaches the last letter priority is given to the backward text. if mydata.split()[-1] == forward.split()[-1]: forward = mydata except: pass else: if mydata.split()[0] == i.split()[0]: backward = mydata # Using sentimental analysis to identify the negativity of the text if ( len(forward) < len(data) and len(forward) < len(backward) and senty(forward)[1] == 0 and re.findall("%s" % wrongtitlepattern, forward) == [] ): words = re.findall(r"[\w']+|[.,!?; ]", forward) # Eliminating the possibility of criminal name getting added to a government title. for word in words: if word in Lname: Flag = 1 if Flag == 0: data = forward n = j # Using sentimental analysis to identify the negativity of the text if ( len(backward) < len(data) and len(forward) > len(backward) and senty(backward)[1] == 0 and re.findall("%s" % wrongtitlepattern, backward) == [] ): words = re.findall(r"[\w']+|[.,!?; ]", backward) # Eliminating the possibility of criminal name getting added to a government title. for word in words: if word in Lname: Flag = 1 if Flag == 0: data = backward n = j except: pass try: if len(mydata) != len(data): gname.append(i) ctitle.append(n) if len(forward) < len(backward): mydata = "".join(re.split(i + data + n, mydata)) else: mydata = "".join(re.split(n + data + i, mydata)) except: continue return (ctitle, gname, mydata)