maxurl = None #url with maximum matching for contentline in content: #going through each line in the user data if (contentline != "\n"): #get rid of unrequired lines read linecount += 1 URLsToCheck = [None] * 3 #For checking first 3 URLs URLsToCheck = getURL3.URLFinder(contentline) maxval = 0 #a flag variable for j in range(3): #for all 3 urls if (URLsToCheck[j] != None): webSearch.searchResults(URLsToCheck[j]) tempval = comparefuzzy.check(contentline) if (tempval > maxval): maxval = tempval maxurl = URLsToCheck[j] tempval = 0 if (maxval > 85): break if (maxval > 85): #85%...a threshold value for Levenshtein Distance... plagper += 100 #kept a threshold value of 85% as per the performance of the algo seen before matched.write("Line-" + str(linecount) + "::" + maxurl + "\n") #writing for matched sources # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n") #writing for highlighting highlight.write(contentline.upper() + "Source::{" + maxurl + "}\n")
linecount = 0 #keeps a line count plagper = 0 #finds the plagiarism percentage for contentline in content: #going through each line in the user data if (contentline != "\n"): #get rid of unrequired lines read linecount += 1 #keep a count of content lines URLsToCheck = None #variable to store url found URLsToCheck = getURL1.URLFinder(contentline) #finds the url somevar = 0 #a flag variable if (URLsToCheck != None): webSearch.searchResults(URLsToCheck) #scraps from the url somevar = comparefuzzy.check( contentline) #compares with the scraped data if (somevar > 85): plagper += 100 #kept a threshold value of 85% as per the performance of the algo seen before...for Levenshtein Distance matched.write("Line-" + str(linecount) + "::" + URLsToCheck + "\n") #writing for matched sources # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n") #writing for highlighting highlight.write(contentline.upper() + "Source::{" + URLsToCheck + "}\n") else: plagper += somevar highlight.write(contentline) #writing non-highlighted