Пример #1
0
def getPlagiarismGroups(self, results):
	"""Returns a list of lists each containing all PlagResults
			of a plagiarism group.
	"""
	#get Identifier for each cluster
	clusters = getClusters(results, onlyPositives=False, onlyNonZeroSimilarities=True)
	groups = [[] for c in clusters]
	
	for r in results:
		#get ids
		ids = r.getIdentifier()
		#look to which cluster the result belongs
		for i in xrange(len(clusters)):
			if clusters[i].__contains__(ids[0]) or clusters[i].__contains__(ids[1]):
				groups[i].append(r)
				break
	return groups
Пример #2
0
def createClusterHeatmap(resultList, recSize = 20, onlyPositiveResults=True):
    """Creates a heatmap chart showing the clusters found in the given results.
    """
    #===create cluster matrix===
    if onlyPositiveResults:
        #get all postive Results
        results = getPositiveResults(resultList)
    else:
        results = resultList
        
    if not results:
        return None

    #extract ids and clusters
    ids = getIdentifier(results)
    ids.sort()
    clusters = getClusters(results)
    print str(clusters)
    #create matrix
    matrix = [[0 for x in ids] for y in ids]
    
    for y in xrange(len(ids)):
        for x in xrange(len(ids)):
            xL = ids[x]
            yL = ids[y]
            if x!=y:
                val = getClusterNr(xL, yL, clusters)
            else: val = None
            matrix[y][x] = val

    #===init===
	font = ImageFont.load_default()
    scopes = [(i+0.5) for i in xrange(len(clusters)+1)]
    print scopes
    #color init
    colorImgFG = (0,0,0)
    colorImgBG = (255,255,255)
    colorChartBG = (230, 230, 230)
    colorGrid = (100,100,100)

    #create heatmap img   
    img = createHeatmapChart(matrix,
                             ids, 
                             ids,
                             scopes = scopes,
                             recSize = recSize, 
                             font = font)
    
    maxX = img.size[0]
    maxY = img.size[1]
    
    #add legend
#===============================================================================
#    legendimg = createLegend(scopes, maxY, font, colorImgBG, colorImgFG)
#    newImg = Image.new("RGB", (maxX+legendimg.size[0], maxY))
#    newImg.paste(img, (0,0))
#    newImg.paste(legendimg, (img.size[0]+1, 0))
#===============================================================================
    
    #clean up
    del font
    
    #return img
#    return newImg
    return img
Пример #3
0
def resultsToTorc(resultList, colored=False):
    """Takes the result and returns an image showing a Torc indicating the
        similarity relations of the compared texts in the results.
        
        A Torc is a kind of overview which allows the user to recognize the similarity
        relations between different texts. Therefore all texts are arranged on a circle.
        For each relation of similarity between two texts a connecting line is drawn.
    """
    #check preconditions
    if type(resultList) != type([]):
        raise NoValidArgumentError, 'Input must be of type list'
    elif len(resultList) == 0:
        return None
    else:
        for result in resultList:
            if type(result) != type(PlagResult()):
                raise NoValidArgumentError, 'Input list should only contain values of type PlagResult.'
    #1. get all identifiers of the results
    idSet = set()
    for result in resultList:
        for id in result.getIdentifier():
            idSet.add(id)
    idSet = list(idSet)
    idSet.sort()

    #2. create a circle with a size depending on the number of identifier
    font = ImageFont.load_default()
    freespace = computeMaxIdLength(idSet, font)
    margin = 10
    radius = computeRadius(
        len(idSet))  # computes radius depending on number of ids
    xM = freespace + radius + margin  #middle x pos of circle
    yM = freespace + radius + margin  #middle y pos of circle
    img = Image.new('RGB', (2 * xM, 2 * yM), (255, 255, 255))
    draw = ImageDraw.Draw(img)
    draw.arc((freespace + margin, freespace + margin, freespace + margin +
              (2 * radius), freespace + margin + (2 * radius)),
             0,
             360,
             fill=(150, 150, 150))

    #3. arrange the ids along the circle and save the coordinates for each id
    distToNextId = 360 / len(idSet)
    angles = range(0, 360, distToNextId)
    idPosDict = {}
    for idNr in xrange(0, len(idSet)):
        # x = xM + r * cos phi und y = yM + r * sin phi
        pos = (xM + (radius * cos(radians(angles[idNr]))),
               yM + (radius * sin(radians(angles[idNr]))))
        idPosDict.setdefault(idSet[idNr], pos)

    # use a truetype font and draw the id names
    for id in idPosDict:
        draw.text(computeFontPos(font, draw, str(id), idPosDict.get(id), xM,
                                 yM),
                  str(id),
                  font=font,
                  fill=(0, 0, 0))

    #4. walk through the results and plot the similarity relations as lines between the Ids
    if colored:
        #TODO: Params von aussen eingeben?
        clusters = getClusters(resultList,
                               onlyPositives=False,
                               onlyNonZeroSimilarities=False)

    for result in resultList:
        if result.isSuspectPlagiarism():
            ids = result.getIdentifier()
            if colored:
                color = getColorForScope(
                    getClusterNr(ids[0], ids[1], clusters),
                    range(len(clusters)))
            else:
                color = (0, 0, 0)
            draw.line([idPosDict.get(ids[0]),
                       idPosDict.get(ids[1])],
                      fill=color)

    del draw  #free draw instance

    #5. return the image
    return img
Пример #4
0
def resultsToTorc(resultList, colored=False):
    """Takes the result and returns an image showing a Torc indicating the
        similarity relations of the compared texts in the results.
        
        A Torc is a kind of overview which allows the user to recognize the similarity
        relations between different texts. Therefore all texts are arranged on a circle.
        For each relation of similarity between two texts a connecting line is drawn.
    """
    #check preconditions
    if type(resultList) != type([]):
        raise NoValidArgumentError, 'Input must be of type list'
    elif len(resultList) == 0:
        return None
    else:
        for result in resultList:
            if type(result) != type(PlagResult()):
                raise NoValidArgumentError, 'Input list should only contain values of type PlagResult.'
    #1. get all identifiers of the results
    idSet = set()
    for result in resultList:
        for id in result.getIdentifier():
            idSet.add(id)
    idSet = list(idSet)
    idSet.sort()
            
    #2. create a circle with a size depending on the number of identifier
    font = ImageFont.load_default()
    freespace = computeMaxIdLength(idSet, font)
    margin = 10
    radius = computeRadius(len(idSet)) # computes radius depending on number of ids
    xM = freespace + radius + margin    #middle x pos of circle
    yM = freespace + radius + margin   #middle y pos of circle
    img = Image.new('RGB', (2*xM, 2*yM), (255, 255, 255))
    draw = ImageDraw.Draw(img)
    draw.arc((freespace+margin, freespace+margin, freespace+margin+(2*radius), freespace+margin+(2*radius)), 0, 360, fill = (150, 150, 150))
    
    #3. arrange the ids along the circle and save the coordinates for each id
    distToNextId = 360 / len(idSet)
    angles = range(0, 360, distToNextId)
    idPosDict = {}
    for idNr in xrange(0, len(idSet)):
        # x = xM + r * cos phi und y = yM + r * sin phi
        pos = (xM + (radius * cos(radians(angles[idNr]))), 
               yM + (radius * sin(radians(angles[idNr]))))
        idPosDict.setdefault(idSet[idNr], pos)
    
    # use a truetype font and draw the id names
    for id in idPosDict:
        draw.text(computeFontPos(font, draw, str(id), idPosDict.get(id), xM, yM), 
                  str(id), 
                  font=font,
                  fill = (0, 0, 0))
    
    #4. walk through the results and plot the similarity relations as lines between the Ids
    if colored:
        #TODO: Params von aussen eingeben?
        clusters = getClusters(resultList, onlyPositives=False, onlyNonZeroSimilarities=False)

    for result in resultList:
        if result.isSuspectPlagiarism():
            ids = result.getIdentifier()
            if colored:
                color = getColorForScope(getClusterNr(ids[0], ids[1], clusters), range(len(clusters)))
            else:
                color = (0,0,0)
            draw.line([idPosDict.get(ids[0]), idPosDict.get(ids[1])], fill = color)
        
    del draw #free draw instance
   
    #5. return the image
    return img