def execute_ldtools( verbosity, origin_urls, depth, follow_all, follow_uris, socket_timeout, GRAPH_SIZE_LIMIT, print_all_resources, only_print_uris, only_print_uri_content, only_negotiate ): set_colored_logger(verbosity) # customize Origin.objects.post_create_hook for performance reasons def custom_post_create_hook(origin): origin.timedelta = datetime.timedelta(minutes=5) return origin Origin.objects.post_create_hook = custom_post_create_hook url_count = len(origin_urls) if url_count > 1: logger.info("Retrieving content of %s URLs" % url_count) if follow_all: only_follow_uris = None logging.info("Following all URIs") elif follow_uris: only_follow_uris = follow_uris logging.info("Following values matching: %s" % ", ".join(only_follow_uris)) else: only_follow_uris = [] if socket_timeout: import socket logger.info("Setting socket timeout to %s" % socket_timeout) socket.setdefaulttimeout(socket_timeout) kw = dict(raise_errors=False) if GRAPH_SIZE_LIMIT: kw["GRAPH_SIZE_LIMIT"] = GRAPH_SIZE_LIMIT for url in origin_urls: url = get_slash_url(url) origin, created = Origin.objects.get_or_create(url) logger.info("Retrieving content of %s" % origin.uri) if only_negotiate or only_print_uri_content: try: data = origin.backend.GET( uri=origin.uri, httphandler=urllib2.HTTPHandler(debuglevel=1)) except Exception as exc: print(exc) continue if only_print_uri_content: print('\n', data, '\n') else: origin.GET(only_follow_uris=only_follow_uris, **kw) if only_negotiate or only_print_uri_content: sys.exit(0) if depth: for round in range(depth): for origin in Origin.objects.all(): origin.GET(only_follow_uris=only_follow_uris, **kw) for orig_url in origin_urls: url = get_slash_url(orig_url) origin = Origin.objects.get(url) for r in origin.get_resources(): if r._uri == get_rdflib_uriref(orig_url): logger.info(u"Printing all available information " "about {0}".format(r._uri)) if hasattr(r, "_has_changes"): delattr(r, "_has_changes") if hasattr(r, "pk"): delattr(r, "pk") pprint.pprint(r.__dict__) if print_all_resources: all_resources = Resource.objects.all() if (only_print_uris): for resource in all_resources: print(resource) else: for r in all_resources: if hasattr(r, "_has_changes"): delattr(r, "_has_changes") if hasattr(r, "pk"): delattr(r, "pk") pprint.pprint(r.__dict__)
# -*- coding: utf-8 -*- from __future__ import print_function import rdflib from ldtools.origin import Origin from ldtools.resource import Resource from ldtools.utils import get_slash_url from ldtools.helpers import set_colored_logger logger = set_colored_logger(2) count_things = lambda: (len(Origin.objects.all()), len(Resource.objects.all())) GET_kw = dict( only_follow_uris=[rdflib.RDFS.seeAlso, rdflib.OWL.sameAs], handle_owl_imports=True, ) from functools import wraps def log_resource_n_origin_diff(func): """ Logs hwo the storage's content changed during operation """ @wraps(func) def new_func(*args, **kwargs):