Exemplo n.º 1
0
 def __init__(self):
     ASearchResultConstraint.__init__(self)
     
     self.__stamped  = StampedSource()
     self.__resolver = Resolver()
Exemplo n.º 2
0
class UniqueSearchResultConstraint(ASearchResultConstraint):
    
    """
        Represents a single uniqueness constraint with respect to entity search 
        results, providing the ability to verify that the constraint is 
        satisfied via the validate function.
    """
    
    def __init__(self):
        ASearchResultConstraint.__init__(self)
        
        self.__stamped  = StampedSource()
        self.__resolver = Resolver()
    
    def validate(self, results):
        """
            Validates the search result set to ensure that there are no obvious 
            duplicate results.
            
            Returns True if all results are unique within a fuzzy margin of error 
            or False otherwise.
        """
        
        proxies = map(self.__stamped.proxyFromEntity, results)
        
        # ensure that no result resolves definitively to any other result in the result set
        for i in xrange(len(proxies)):
            proxy = proxies[i]
            
            def dedup():
                for j in xrange(len(proxies)):
                    proxy2 = proxies[j]
                    
                    if i != j and proxy.kind == proxy2.kind:
                        yield proxy2
            
            dups = self.__resolver.resolve(proxy, generatorSource(dedup()), count=1)
            
            if len(dups) > 0 and dups[0][0]['resolved']:
                return False
        
        seen = defaultdict(set)
        
        # ensure that there are no obvious duplicate results without using the resolver
        for i in xrange(len(results)):
            result = results[i]
            keys   = [ k for k in result.sources if k.endswith('_id') ]
            
            # ensure that the same source id doesn't appear twice in the result set
            # (source ids are supposed to be unique)
            for key in keys:
                value = str(result[key])
                
                if value in seen[key]:
                    return False
                
                seen[key].add(value)
            
            for j in xrange(i + 1, len(results)):
                result2 = results[j]
                
                if i != j and self._eq(result.kind, result2.kind) and self._eq(result.title, result2.title):
                    if len(result.types.intersection(result2.types)) > 0:
                        utils.log("")
                        utils.log("!" * 80)
                        utils.log("dupe encountered: %s\n%s" % (result, result2))
                        utils.log("!" * 80)
                        utils.log("")
                        
                        return False
        
        return True