示例#1
0
文件: resource.py 项目: dmr/Ldtools
    def get(self, uri, origin=None):
        """If the authoratative Origin to the Resource does not exist and no
        origin is given then DoesNotExist is returned. Assumption is
        to only trust validated sources.

        Alternative: this could point to source with most parameters given
        etc because user may want to just have the Resource with most
        content/know everything about a uri.
        --> If DoesNotExist occurs Resources with uri might still
        exist but no validated Resources exist.
        """

        uri = get_rdflib_uriref(uri)

        if not origin:
            filter_result = list(self.filter(_uri=uri))

            if not filter_result:
                raise self.model.DoesNotExist

            if len(filter_result) == 1:
                # return only match
                return filter_result[0]
            else:
                raise self.model.DoesNotExist(
                    "Please pass the exact "
                    "Origin. The Resource you are looking for is "
                    "provided by the Origins: %s"
                    % ", ".join([unicode(r._origin.uri) for r in filter_result])
                )

        pk = self.get_pk(origin_uri=origin.uri, uri=uri)
        return super(ResourceManager, self).get(pk=pk)
        assert 0, "implement!"
示例#2
0
文件: test_url.py 项目: dmr/Ldtools
 def test_get_rdflib_uriref_result(self):
     test_cases = [
         ("http://web.de/test?query=bla",
          rdflib.URIRef("http://web.de/test?query=bla")),
     ]
     for test, result in test_cases:
         self.assertEqual(get_rdflib_uriref(test), result, msg=test)
示例#3
0
文件: resource.py 项目: dmr/Ldtools
    def create(self, uri, origin, **kwargs):

        # from ldtools.origin import Origin <-- import circle problem
        # assert isinstance(origin, Origin), "Origin instance required"
        assert origin is not None
        assert origin.processed, ("Origin has to be processed before creating more Resource objects: origin.GET()")

        uri = get_rdflib_uriref(uri)

        pk = self.get_pk(origin_uri=origin.uri, uri=uri)
        return super(ResourceManager, self).create(
            pk=pk, _uri=uri, _origin=origin, **kwargs)
示例#4
0
文件: origin.py 项目: dmr/Ldtools
    def create(self, uri, BACKEND=None):
        uri = get_rdflib_uriref(uri)
        if not uri == get_slash_url(uri):
            msg = ("URI passed to Origin Manager was not a slash URI: %s. "
                   "Fixed now." % uri)
            logger.debug(msg)
            uri = get_slash_url(uri)

        backend = BACKEND if BACKEND else RestBackend()
        origin = super(OriginManager, self).create(
            pk=uri, uri=uri,
            backend=backend)
        return self.post_create_hook(origin)
示例#5
0
文件: origin.py 项目: dmr/Ldtools
    def get_or_create(self, uri, **kwargs):

        uri = get_rdflib_uriref(uri)
        if not uri == get_slash_url(uri):
            msg = ("URI passed to Origin Manager was not a slash URI: %s. "
                   "Fixed now." % uri)
            logger.warning(msg)
            uri = get_slash_url(uri)

        try:
            if kwargs:
                logger.warning("kwargs are ignored for get.")
            return self.get(uri), False
        except self.model.DoesNotExist:
            return self.create(uri, **kwargs), True
示例#6
0
文件: tools.py 项目: dmr/Ldtools
def get_authoritative_resource(uri, create_nonexistent_origin=True):
    """Tries to return the Resource object from the authoritative origin uri"""

    uri = get_rdflib_uriref(uri)
    origin_uri = get_slash_url(uri)

    authoritative_origin = Origin.objects.filter(uri=origin_uri)
    authoritative_origin_list = list(authoritative_origin)
    if len(authoritative_origin_list) == 1:
        origin = authoritative_origin_list[0]
    else:
        if create_nonexistent_origin:
            origin, created = Origin.objects.get_or_create(uri=origin_uri)
        else:
            raise Resource.DoesNotExist(
                "No authoritative Resource found for %s" % uri)

    if not origin.has_unsaved_changes():
        origin.GET(only_follow_uris=[], raise_errors=False)

    authoritative_resource = Resource.objects.get(uri=uri, origin=origin)
    return authoritative_resource
示例#7
0
文件: origin.py 项目: dmr/Ldtools
 def get(self, uri, **kwargs):
     """Retrieves Origin object from Store"""
     uri = get_rdflib_uriref(uri)
     return super(OriginManager, self).get(pk=uri)
示例#8
0
文件: cli.py 项目: dmr/Ldtools
def execute_ldtools(
    verbosity,
    origin_urls,
    depth,
    follow_all,
    follow_uris,
    socket_timeout,
    GRAPH_SIZE_LIMIT,
    print_all_resources,
    only_print_uris,
    only_print_uri_content,
    only_negotiate
):
    set_colored_logger(verbosity)

    # customize Origin.objects.post_create_hook for performance reasons
    def custom_post_create_hook(origin):
        origin.timedelta = datetime.timedelta(minutes=5)
        return origin
    Origin.objects.post_create_hook = custom_post_create_hook

    url_count = len(origin_urls)

    if url_count > 1:
        logger.info("Retrieving content of %s URLs" % url_count)

    if follow_all:
        only_follow_uris = None
        logging.info("Following all URIs")
    elif follow_uris:
        only_follow_uris = follow_uris
        logging.info("Following values matching: %s"
                     % ", ".join(only_follow_uris))
    else:
        only_follow_uris = []

    if socket_timeout:
        import socket
        logger.info("Setting socket timeout to %s" % socket_timeout)
        socket.setdefaulttimeout(socket_timeout)

    kw = dict(raise_errors=False)
    if GRAPH_SIZE_LIMIT:
        kw["GRAPH_SIZE_LIMIT"] = GRAPH_SIZE_LIMIT

    for url in origin_urls:
        url = get_slash_url(url)
        origin, created = Origin.objects.get_or_create(url)
        logger.info("Retrieving content of %s" % origin.uri)

        if only_negotiate or only_print_uri_content:
            try:
                data = origin.backend.GET(
                    uri=origin.uri,
                    httphandler=urllib2.HTTPHandler(debuglevel=1))
            except Exception as exc:
                print(exc)
                continue
            if only_print_uri_content:
                print('\n', data, '\n')
        else:
            origin.GET(only_follow_uris=only_follow_uris, **kw)

    if only_negotiate or only_print_uri_content:
        sys.exit(0)

    if depth:
        for round in range(depth):
            for origin in Origin.objects.all():
                origin.GET(only_follow_uris=only_follow_uris, **kw)

    for orig_url in origin_urls:
        url = get_slash_url(orig_url)
        origin = Origin.objects.get(url)
        for r in origin.get_resources():
            if r._uri == get_rdflib_uriref(orig_url):
                logger.info(u"Printing all available information "
                    "about {0}".format(r._uri))
                if hasattr(r, "_has_changes"):
                    delattr(r, "_has_changes")
                if hasattr(r, "pk"):
                    delattr(r, "pk")
                pprint.pprint(r.__dict__)

    if print_all_resources:
        all_resources = Resource.objects.all()
        if (only_print_uris):
            for resource in all_resources:
                print(resource)
        else:
            for r in all_resources:
                if hasattr(r, "_has_changes"):
                    delattr(r, "_has_changes")
                if hasattr(r, "pk"):
                    delattr(r, "pk")
                pprint.pprint(r.__dict__)
示例#9
0
文件: resource.py 项目: dmr/Ldtools
 def get_or_create(self, uri, origin=None):
     uri = get_rdflib_uriref(uri)
     try:
         return self.get(uri=uri, origin=origin), False
     except self.model.DoesNotExist:
         return self.create(uri=uri, origin=origin), True