Пример #1
0
maxurl = None  #url with maximum matching

for contentline in content:  #going through each line in the user data

    if (contentline != "\n"):  #get rid of unrequired lines read

        linecount += 1

        URLsToCheck = [None] * 3  #For checking first 3 URLs
        URLsToCheck = getURL3.URLFinder(contentline)

        maxval = 0  #a flag variable

        for j in range(3):  #for all 3 urls
            if (URLsToCheck[j] != None):
                webSearch.searchResults(URLsToCheck[j])
                tempval = comparefuzzy.check(contentline)
                if (tempval > maxval):
                    maxval = tempval
                    maxurl = URLsToCheck[j]
                    tempval = 0
                if (maxval > 85):
                    break

        if (maxval > 85):  #85%...a threshold value for Levenshtein Distance...
            plagper += 100  #kept a threshold value of 85% as per the performance of the algo seen before

            matched.write("Line-" + str(linecount) + "::" + maxurl +
                          "\n")  #writing for matched sources
            # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n")	#writing for highlighting
            highlight.write(contentline.upper() + "Source::{" + maxurl + "}\n")
Пример #2
0
linecount = 0  #keeps a line count
plagper = 0  #finds the plagiarism percentage
for contentline in content:  #going through each line in the user data

    if (contentline != "\n"):  #get rid of unrequired lines read

        linecount += 1  #keep a count of content lines

        URLsToCheck = None  #variable to store url found
        URLsToCheck = getURL1.URLFinder(contentline)  #finds the url

        somevar = 0  #a flag variable

        if (URLsToCheck != None):
            webSearch.searchResults(URLsToCheck)  #scraps from the url
            somevar = comparefuzzy.check(
                contentline)  #compares with the scraped data
            if (somevar > 85):
                plagper += 100  #kept a threshold value of 85% as per the performance of the algo seen before...for Levenshtein Distance

                matched.write("Line-" + str(linecount) + "::" + URLsToCheck +
                              "\n")  #writing for matched sources
                # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n")	#writing for highlighting
                highlight.write(contentline.upper() + "Source::{" +
                                URLsToCheck + "}\n")
            else:
                plagper += somevar

                highlight.write(contentline)  #writing non-highlighted