Пример #1
0
def analysis(Name, mydata, Familyflag, Suspect, Familynames, shift):
    tagger = ner.SocketNER(host="localhost", port=8080)
    sentences = tokenize.sent_tokenize(mydata)
    names = initialze()
    pronoun = initialze()
    Lname = lastname(Name)
    # Tagging out the sentences with the non criminal activities.
    for i in sentences:
        # shape = '(?:^|(?<= ))(%s)(?:(?= )|$)'
        if (
            re.findall(".*?(%s).*?" % "|".join(ihatecrime), i, re.IGNORECASE) == []
            and re.findall(".*?(%s).*?" % action, i) == []
            and senty(i)[1] > 0
            and re.findall("%s" % ("|".join(Lname)), i, re.IGNORECASE) != []
        ):
            try:
                pronoun = tagger.get_entities(i)[shift]
                for pnoun in pronoun:
                    if (
                        i == sentences[0]
                        and (
                            re.findall("[By|by] (%s)" % pnoun, i) != [] or re.findall("CONTACT: (%s)" % pnoun, i) != []
                        )
                    ) or re.findall("(%s).*?(%s)" % (pnoun, options), i) != []:
                        Name = list(set(Name) - set([pnoun]))
                    elif pnoun in Name or pnoun.title() in Lname:
                        names.append(pnoun.title())
                    elif pnoun in Name or pnoun in Lname:
                        names.append(pnoun)
                    elif len(pnoun.split()) == 1 and Familyflag == "True" and pnoun in " ".join(Familynames).split():
                        names.append(familydict[pnoun])

            except:
                pass
    Lastname = list(set(names + Suspect))
    Fullname = fulname(Name + Suspect, Lastname)
    Fullname.sort(key=len, reverse=True)
    a = []
    d = []
    for i in Fullname:
        for j in i.split():
            if j not in d and len(i.split()) > 1:
                d.append(j)
                a.append(i)
    Fullname = list(set(a))
    Famname = initialze()
    # Identifying the family member involved in the crime
    if Familyflag == "True":
        for i in Fullname:
            if i.split()[-1] in Lastname and i not in Fullname:
                forward = mydata.split(i)[1].split(". ")[0]
                backward = mydata.split(i)[0].split(". ")[-1]
                if senty(forward)[1] != 0 or senty(backward)[1] != 0:
                    Famname.append(i)
        Fullname = Fullname + Famname
    return Fullname
Пример #2
0
def findname(Fullname, mydata, crimetitle, words, Lname):
    ctitle = initialze()
    gname = initialze()
    for i in Fullname:
        data = mydata
        for j in crimetitle:
            Flag = 0
            try:
                # Identifies the Name of the person in government position or proved as innocent in the crime.
                # sentence following the name till the crimetitle
                forward = mydata.split(i)[1].split(j)[0]
                # sentence prior to the name till the crimetitle
                backward = mydata.split(i)[0].split(j)[-1]
                if backward.split() != []:
                    # when the length of backward and forward is equal priority is provided to the forward if it's immediately
                    # followed by a comma or when the backward reaches the first letter of the article.
                    if mydata.split()[0] == backward.split()[0] or (
                        len(forward) == len(backward) and forward[0] == ","
                    ):
                        backward = mydata
                    try:
                        # When the forward reaches the last letter priority is given to the backward text.
                        if mydata.split()[-1] == forward.split()[-1]:
                            forward = mydata
                    except:
                        pass
                else:
                    if mydata.split()[0] == i.split()[0]:
                        backward = mydata
                # Using sentimental analysis to identify the negativity of the text
                if (
                    len(forward) < len(data)
                    and len(forward) < len(backward)
                    and senty(forward)[1] == 0
                    and re.findall("%s" % wrongtitlepattern, forward) == []
                ):
                    words = re.findall(r"[\w']+|[.,!?; ]", forward)
                    # Eliminating the possibility of criminal name getting added to a government title.
                    for word in words:
                        if word in Lname:
                            Flag = 1
                    if Flag == 0:
                        data = forward
                        n = j
                # Using sentimental analysis to identify the negativity of the text
                if (
                    len(backward) < len(data)
                    and len(forward) > len(backward)
                    and senty(backward)[1] == 0
                    and re.findall("%s" % wrongtitlepattern, backward) == []
                ):
                    words = re.findall(r"[\w']+|[.,!?; ]", backward)
                    # Eliminating the possibility of criminal name getting added to a government title.
                    for word in words:
                        if word in Lname:
                            Flag = 1
                    if Flag == 0:
                        data = backward
                        n = j
            except:
                pass
        try:
            if len(mydata) != len(data):
                gname.append(i)
                ctitle.append(n)
                if len(forward) < len(backward):
                    mydata = "".join(re.split(i + data + n, mydata))
                else:
                    mydata = "".join(re.split(n + data + i, mydata))
        except:
            continue
    return (ctitle, gname, mydata)