示例#1
0
文件: cli.py 项目: dmr/Ldtools
def execute_ldtools(
    verbosity,
    origin_urls,
    depth,
    follow_all,
    follow_uris,
    socket_timeout,
    GRAPH_SIZE_LIMIT,
    print_all_resources,
    only_print_uris,
    only_print_uri_content,
    only_negotiate
):
    set_colored_logger(verbosity)

    # customize Origin.objects.post_create_hook for performance reasons
    def custom_post_create_hook(origin):
        origin.timedelta = datetime.timedelta(minutes=5)
        return origin
    Origin.objects.post_create_hook = custom_post_create_hook

    url_count = len(origin_urls)

    if url_count > 1:
        logger.info("Retrieving content of %s URLs" % url_count)

    if follow_all:
        only_follow_uris = None
        logging.info("Following all URIs")
    elif follow_uris:
        only_follow_uris = follow_uris
        logging.info("Following values matching: %s"
                     % ", ".join(only_follow_uris))
    else:
        only_follow_uris = []

    if socket_timeout:
        import socket
        logger.info("Setting socket timeout to %s" % socket_timeout)
        socket.setdefaulttimeout(socket_timeout)

    kw = dict(raise_errors=False)
    if GRAPH_SIZE_LIMIT:
        kw["GRAPH_SIZE_LIMIT"] = GRAPH_SIZE_LIMIT

    for url in origin_urls:
        url = get_slash_url(url)
        origin, created = Origin.objects.get_or_create(url)
        logger.info("Retrieving content of %s" % origin.uri)

        if only_negotiate or only_print_uri_content:
            try:
                data = origin.backend.GET(
                    uri=origin.uri,
                    httphandler=urllib2.HTTPHandler(debuglevel=1))
            except Exception as exc:
                print(exc)
                continue
            if only_print_uri_content:
                print('\n', data, '\n')
        else:
            origin.GET(only_follow_uris=only_follow_uris, **kw)

    if only_negotiate or only_print_uri_content:
        sys.exit(0)

    if depth:
        for round in range(depth):
            for origin in Origin.objects.all():
                origin.GET(only_follow_uris=only_follow_uris, **kw)

    for orig_url in origin_urls:
        url = get_slash_url(orig_url)
        origin = Origin.objects.get(url)
        for r in origin.get_resources():
            if r._uri == get_rdflib_uriref(orig_url):
                logger.info(u"Printing all available information "
                    "about {0}".format(r._uri))
                if hasattr(r, "_has_changes"):
                    delattr(r, "_has_changes")
                if hasattr(r, "pk"):
                    delattr(r, "pk")
                pprint.pprint(r.__dict__)

    if print_all_resources:
        all_resources = Resource.objects.all()
        if (only_print_uris):
            for resource in all_resources:
                print(resource)
        else:
            for r in all_resources:
                if hasattr(r, "_has_changes"):
                    delattr(r, "_has_changes")
                if hasattr(r, "pk"):
                    delattr(r, "pk")
                pprint.pprint(r.__dict__)
示例#2
0
文件: dbpedia.py 项目: dmr/Ldtools
# -*- coding: utf-8 -*-
from __future__ import print_function

import rdflib

from ldtools.origin import Origin
from ldtools.resource import Resource
from ldtools.utils import get_slash_url
from ldtools.helpers import set_colored_logger

logger = set_colored_logger(2)


count_things = lambda: (len(Origin.objects.all()), len(Resource.objects.all()))


GET_kw = dict(
    only_follow_uris=[rdflib.RDFS.seeAlso, rdflib.OWL.sameAs],
    handle_owl_imports=True,
)


from functools import wraps


def log_resource_n_origin_diff(func):
    """
    Logs hwo the storage's content changed during operation
    """
    @wraps(func)
    def new_func(*args, **kwargs):