Exemplo n.º 1
0
    def create(self, uri, BACKEND=None):
        uri = get_rdflib_uriref(uri)
        if not uri == get_slash_url(uri):
            msg = ("URI passed to Origin Manager was not a slash URI: %s. "
                   "Fixed now." % uri)
            logger.debug(msg)
            uri = get_slash_url(uri)

        backend = BACKEND if BACKEND else RestBackend()
        origin = super(OriginManager, self).create(
            pk=uri, uri=uri,
            backend=backend)
        return self.post_create_hook(origin)
Exemplo n.º 2
0
    def get_or_create(self, uri, **kwargs):

        uri = get_rdflib_uriref(uri)
        if not uri == get_slash_url(uri):
            msg = ("URI passed to Origin Manager was not a slash URI: %s. "
                   "Fixed now." % uri)
            logger.warning(msg)
            uri = get_slash_url(uri)

        try:
            if kwargs:
                logger.warning("kwargs are ignored for get.")
            return self.get(uri), False
        except self.model.DoesNotExist:
            return self.create(uri, **kwargs), True
Exemplo n.º 3
0
 def test_hash_or_slash_uri_exceptions(self):
     test_cases = [
     ]
     for test, result in test_cases:
         print(urlparse.urlparse(test))
         test = rdflib.URIRef(test)
         result = rdflib.URIRef(result)
         self.assertEqual(get_slash_url(test), result, msg=test)
Exemplo n.º 4
0
 def is_authoritative_resource(self):
     """Definition "authoritative" according to
     "SAOR: Authoritative Reasoning for the Web"
     http://www.springerlink.com/content/w47632745gm76x01/"""
     if isinstance(self._uri, rdflib.BNode):
         return True
     if get_slash_url(self._uri) == self._origin.uri:
         return True
Exemplo n.º 5
0
    def populate_resources(self, graph):
        namespace_short_notation_reverse_dict = {
            unicode(rdflib_url): prefix
            for rdflib_url, prefix in reverse_dict(
                safe_dict(dict(graph.namespace_manager.namespaces()))
            ).items()
        }
        reference_time = datetime.datetime.now()

        for subject, predicate, obj_ect in graph:
            assert hasattr(subject, "n3")

            # workaround for rdflib's unicode problems
            assert predicate.encode('utf8')

            if self.handle_owl_imports:
                if (predicate == rdflib.OWL.imports and type(obj_ect) == rdflib.URIRef):
                    uri = get_slash_url(obj_ect)
                    origin, created = Origin.objects.get_or_create(uri=uri)

                    logger.info("Interrupting to process owl:imports %s"
                                "first" % (origin.uri))
                    origin.GET()

            if ((
                self.only_follow_uris is not None and predicate in self.only_follow_uris
            ) or self.only_follow_uris is None):
                if type(obj_ect) == rdflib.URIRef:
                    # wrong scheme mailto, tel, callto --> should be Literal?
                    if is_valid_url(obj_ect):
                        obj_uriref = get_slash_url(obj_ect)
                        Origin.objects.get_or_create(uri=obj_uriref)

            resource, _created = Resource.objects.get_or_create(uri=subject, origin=self.origin)
            resource._add_property(predicate, obj_ect, namespace_short_notation_reverse_dict)

        now = datetime.datetime.now()
        self.origin.graph_handler_time = now - reference_time

        for resource in self.origin.get_resources():
            resource._has_changes = False
Exemplo n.º 6
0
    def test_hash_or_slash_uri_result(self):
        test_cases = [
            ("http://creativecommons.org/licenses/by-nc/3.0/",
             "http://creativecommons.org/licenses/by-nc/3.0/"),

            ("http://www.ifrade.es/#frade", "http://www.ifrade.es/"),
        ]
        for test, result in test_cases:
            print(urlparse.urlparse(test))
            test = test
            result = rdflib.URIRef(result)
            self.assertEqual(get_slash_url(test), result, msg=test)
Exemplo n.º 7
0
Arquivo: tools.py Projeto: dmr/Ldtools
def get_authoritative_resource(uri, create_nonexistent_origin=True):
    """Tries to return the Resource object from the authoritative origin uri"""

    uri = get_rdflib_uriref(uri)
    origin_uri = get_slash_url(uri)

    authoritative_origin = Origin.objects.filter(uri=origin_uri)
    authoritative_origin_list = list(authoritative_origin)
    if len(authoritative_origin_list) == 1:
        origin = authoritative_origin_list[0]
    else:
        if create_nonexistent_origin:
            origin, created = Origin.objects.get_or_create(uri=origin_uri)
        else:
            raise Resource.DoesNotExist(
                "No authoritative Resource found for %s" % uri)

    if not origin.has_unsaved_changes():
        origin.GET(only_follow_uris=[], raise_errors=False)

    authoritative_resource = Resource.objects.get(uri=uri, origin=origin)
    return authoritative_resource
Exemplo n.º 8
0
Arquivo: cli.py Projeto: dmr/Ldtools
def execute_ldtools(
    verbosity,
    origin_urls,
    depth,
    follow_all,
    follow_uris,
    socket_timeout,
    GRAPH_SIZE_LIMIT,
    print_all_resources,
    only_print_uris,
    only_print_uri_content,
    only_negotiate
):
    set_colored_logger(verbosity)

    # customize Origin.objects.post_create_hook for performance reasons
    def custom_post_create_hook(origin):
        origin.timedelta = datetime.timedelta(minutes=5)
        return origin
    Origin.objects.post_create_hook = custom_post_create_hook

    url_count = len(origin_urls)

    if url_count > 1:
        logger.info("Retrieving content of %s URLs" % url_count)

    if follow_all:
        only_follow_uris = None
        logging.info("Following all URIs")
    elif follow_uris:
        only_follow_uris = follow_uris
        logging.info("Following values matching: %s"
                     % ", ".join(only_follow_uris))
    else:
        only_follow_uris = []

    if socket_timeout:
        import socket
        logger.info("Setting socket timeout to %s" % socket_timeout)
        socket.setdefaulttimeout(socket_timeout)

    kw = dict(raise_errors=False)
    if GRAPH_SIZE_LIMIT:
        kw["GRAPH_SIZE_LIMIT"] = GRAPH_SIZE_LIMIT

    for url in origin_urls:
        url = get_slash_url(url)
        origin, created = Origin.objects.get_or_create(url)
        logger.info("Retrieving content of %s" % origin.uri)

        if only_negotiate or only_print_uri_content:
            try:
                data = origin.backend.GET(
                    uri=origin.uri,
                    httphandler=urllib2.HTTPHandler(debuglevel=1))
            except Exception as exc:
                print(exc)
                continue
            if only_print_uri_content:
                print('\n', data, '\n')
        else:
            origin.GET(only_follow_uris=only_follow_uris, **kw)

    if only_negotiate or only_print_uri_content:
        sys.exit(0)

    if depth:
        for round in range(depth):
            for origin in Origin.objects.all():
                origin.GET(only_follow_uris=only_follow_uris, **kw)

    for orig_url in origin_urls:
        url = get_slash_url(orig_url)
        origin = Origin.objects.get(url)
        for r in origin.get_resources():
            if r._uri == get_rdflib_uriref(orig_url):
                logger.info(u"Printing all available information "
                    "about {0}".format(r._uri))
                if hasattr(r, "_has_changes"):
                    delattr(r, "_has_changes")
                if hasattr(r, "pk"):
                    delattr(r, "pk")
                pprint.pprint(r.__dict__)

    if print_all_resources:
        all_resources = Resource.objects.all()
        if (only_print_uris):
            for resource in all_resources:
                print(resource)
        else:
            for r in all_resources:
                if hasattr(r, "_has_changes"):
                    delattr(r, "_has_changes")
                if hasattr(r, "pk"):
                    delattr(r, "pk")
                pprint.pprint(r.__dict__)
Exemplo n.º 9
0
def get_resource_and_connected_resources(uri):
    origin_uri = get_slash_url(uri)
    origin, origin_created = Origin.objects.get_or_create(uri=origin_uri)
    origin.GET(**GET_kw)
    res, res_created = Resource.objects.get_or_create(uri=uri, origin=origin)
    return res