Exemplo n.º 1
0
    def load_vocabulary(self, transform_manager, prefix, uri):
        overrides = getattr(settings, "VOCABULARY_URL_OVERRIDES", {})
        uri = overrides.get(prefix, uri)
        if not uri:
            return

        filename, headers = retrieve(uri)

        if not filename:
            logger.error("Unable to retrieve: %s", headers.get("message"))
            return

        try:

            logger.debug("About to fetch %r for vocabulary %r", uri, prefix)

            if headers["status"] != httplib.OK:
                logger.error("Failed to retrieve %r for vocabulary %r", uri, prefix, extra={"headers": headers})
                return
            content_type = headers["content-type"].split(";")[0]
            if content_type not in ("application/rdf+xml", "text/n3", "text/plain", "text/turtle"):
                logger.error("Unexpected content-type: %r", content_type)
                return

            graph_name = settings.GRAPH_BASE + "vocabulary/" + prefix
            Uploader.upload(
                stores=(transform_manager.store,), graph_name=graph_name, filename=filename, mimetype=content_type
            )
        finally:
            if headers["delete-after"]:
                os.unlink(filename)
Exemplo n.º 2
0
def accept_pingback(pingback):
    uploader = Uploader()
    uploader.upload(store=pingback.store,
                    graph_name=pingback.graph_name,
                    data=pingback.data,
                    mimetype='text/n3')

    pingback.mark_published()
Exemplo n.º 3
0
def accept_pingback(pingback):
    uploader = Uploader()
    uploader.upload(store=pingback.store,
                    graph_name=pingback.graph_name,
                    data=pingback.data,
                    mimetype='text/n3')

    pingback.mark_published()
Exemplo n.º 4
0
    def execute(self, transform_manager, input):
        transform_manager.start(self, [input])

        logger.debug("Starting upload of %r", input)

        client = self.get_redis_client()

        extension = input.rsplit('.', 1)[-1]
        try:
            serializer = self.formats[extension]
        except KeyError:
            logger.exception("Unrecognized RDF extension: %r", extension)
            raise

        graph = rdflib.ConjunctiveGraph()
        graph.parse(open(input, 'r'),
                    format=serializer,
                    publicID=self.graph_name)

        logger.debug("Parsed graph")

        datetime_now = self.site_timezone.localize(datetime.datetime.now().replace(microsecond=0))
        modified = graph.value(self.graph_name, NS['dcterms'].modified,
                               default=rdflib.Literal(datetime_now))
        created = graph.value(self.graph_name, NS['dcterms'].created)
        if not created:
            logger.debug("Getting created date from %r", transform_manager.store.query_endpoint)
            endpoint = Endpoint(transform_manager.store.query_endpoint)
            results = list(endpoint.query(self.created_query % {'graph': self.graph_name.n3()}))
            if results:
                created = results[0].date
            else:
                created = modified

        graph += (
            (self.graph_name, NS['dcterms'].modified, modified),
            (self.graph_name, NS['dcterms'].created, created),
        )

        logger.debug("About to serialize")

        output = transform_manager('rdf')
        with open(output, 'w') as f:
            graph.serialize(f)

        logger.debug("Serialization done; about to upload")

        uploader = Uploader()
        uploader.upload(stores=(transform_manager.store,),
                        graph_name=self.graph_name,
                        filename=output,
                        method=self.method,
                        mimetype='application/rdf+xml')

        logger.debug("Upload complete")

        transform_manager.end([self.graph_name])
        transform_manager.touched_graph(self.graph_name)
Exemplo n.º 5
0
    def archive(self):
        notation = self.notation or hashlib.sha1(self.dataset).hexdigest()

        archive_path = os.path.join(SOURCE_DIRECTORY, 'archive', self.store.slug, notation.replace('/', '-'))
        archive_graph_name = rdflib.URIRef('{0}archive/{1}'.format(settings.GRAPH_BASE, notation))
        data_dump_url = rdflib.URIRef('{0}archive/{1}/{2}/latest.rdf'.format(SOURCE_URL, self.store.slug, notation.replace('/', '-')))

        if not os.path.exists(archive_path):
            os.makedirs(archive_path, 0755)

        nt_fd, nt_name = tempfile.mkstemp('.nt')
        rdf_fd, rdf_name = tempfile.mkstemp('.rdf')
        try:
            nt_out, rdf_out = os.fdopen(nt_fd, 'w'), os.fdopen(rdf_fd, 'w')
            for graph_name in self.graph_names:
                self._graph_triples(nt_out, graph_name)
            nt_out.close()

            sort = subprocess.Popen(['sort', '-u', nt_name], stdout=subprocess.PIPE)
            try:
                triples = itertools.chain(self._get_metadata(rdflib.URIRef(''),
                                                             archive_graph_name),
                                          parse(sort.stdout, 'nt').get_triples())
                serialize(triples, rdf_out, rdf_name)
            finally:
                # Make sure stdout gets closed so that if the try block raises
                # an exception we don't keep a sort process hanging around.
                sort.stdout.close()
                sort.wait()
            rdf_out.close()

            previous_name = os.path.join(archive_path, 'latest.rdf')
            # Only update if the file has changed, or hasn't been archived before.
            if not os.path.exists(previous_name) or not filecmp._do_cmp(previous_name, rdf_name):
                new_name = os.path.join(archive_path,
                                        self.updated.astimezone(pytz.utc).isoformat() + '.rdf')
                shutil.move(rdf_name, new_name)
                os.chmod(new_name, 0644)
                if os.path.exists(previous_name):
                    os.unlink(previous_name)
                os.symlink(new_name, previous_name)

                # Upload the metadata to the store using an absolute URI.
                metadata = self._get_metadata(data_dump_url, archive_graph_name)
                Uploader.upload([self.store], archive_graph_name, graph=metadata)
        finally:
            os.unlink(nt_name)
            if os.path.exists(rdf_name):
                os.unlink(rdf_name)
            self.filter_old_archives(archive_path)
Exemplo n.º 6
0
    def load_vocabulary(self, transform_manager, prefix, uri):
        overrides = getattr(settings, 'VOCABULARY_URL_OVERRIDES', {})
        uri = overrides.get(prefix, uri)
        if not uri:
            return

        filename, headers = retrieve(uri)

        if not filename:
            logger.error("Unable to retrieve: %s", headers.get('message'))
            return

        try:

            logger.debug("About to fetch %r for vocabulary %r", uri, prefix)

            if headers['status'] != httplib.OK:
                logger.error("Failed to retrieve %r for vocabulary %r",
                             uri,
                             prefix,
                             extra={'headers': headers})
                return
            content_type = headers['content-type'].split(';')[0]
            if content_type not in ('application/rdf+xml', 'text/n3',
                                    'text/plain', 'text/turtle'):
                logger.error('Unexpected content-type: %r', content_type)
                return

            graph_name = settings.GRAPH_BASE + 'vocabulary/' + prefix
            Uploader.upload(stores=(transform_manager.store, ),
                            graph_name=graph_name,
                            filename=filename,
                            mimetype=content_type)
        finally:
            if headers['delete-after']:
                os.unlink(filename)
Exemplo n.º 7
0
    def execute(self, transform_manager, input):
        transform_manager.start(self, [input])

        logger.debug("Starting upload of %r", input)

        extension = input.rsplit('.', 1)[-1]
        try:
            serializer = self.formats[extension]
        except KeyError:
            logger.exception("Unrecognized RDF extension: %r", extension)
            raise

        graph = rdflib.ConjunctiveGraph()
        graph.parse(open(input, 'r'),
                    format=serializer,
                    publicID=self.graph_name)

        logger.debug("Parsed graph")

        datetime_now = self.site_timezone.localize(
            datetime.datetime.now().replace(microsecond=0))
        modified = graph.value(self.graph_name,
                               NS['dcterms'].modified,
                               default=rdflib.Literal(datetime_now))
        created = graph.value(self.graph_name, NS['dcterms'].created)
        if not created:
            logger.debug("Getting created date from %r",
                         transform_manager.store.query_endpoint)
            endpoint = Endpoint(transform_manager.store.query_endpoint)
            results = list(
                endpoint.query(self.created_query %
                               {'graph': self.graph_name.n3()}))
            if results:
                created = results[0].date
            else:
                created = modified

        graph += (
            (self.graph_name, NS.rdf.type, NS.sd.Graph),
            (self.graph_name, NS.dcterms.modified, modified),
            (self.graph_name, NS.dcterms.created, created),
        )

        logger.debug("About to serialize")

        output = transform_manager('rdf')
        with open(output, 'w') as f:
            graph.serialize(f)

        logger.debug("Serialization done; about to upload")

        uploader = Uploader()
        uploader.upload(stores=(transform_manager.store, ),
                        graph_name=self.graph_name,
                        filename=output,
                        method=self.method,
                        mimetype='application/rdf+xml')

        logger.debug("Upload complete")

        transform_manager.end([self.graph_name])
        transform_manager.touched_graph(self.graph_name)
Exemplo n.º 8
0
    def archive(self):
        notation = self.notation or hashlib.sha1(self.dataset).hexdigest()

        archive_path = os.path.join(SOURCE_DIRECTORY, 'archive', self.store.slug, notation.replace('/', '-'))
        archive_graph_name = rdflib.URIRef('{0}archive/{1}'.format(settings.GRAPH_BASE, notation))
        data_dump_url = rdflib.URIRef('{0}archive/{1}/{2}/latest.rdf'.format(SOURCE_URL, self.store.slug, notation.replace('/', '-')))
        data_dump_with_labels_url = rdflib.URIRef('{0}archive/{1}/{2}/latest-with-labels.rdf'.format(SOURCE_URL, self.store.slug, notation.replace('/', '-')))

        if not os.path.exists(archive_path):
            os.makedirs(archive_path, 0755)

        nt_fd, nt_name = tempfile.mkstemp('.nt')
        rdf_fd, rdf_name = tempfile.mkstemp('.rdf')
        rdf_with_labels_fd, rdf_with_labels_name = tempfile.mkstemp('.rdf')
        try:
            nt_out, rdf_out = os.fdopen(nt_fd, 'w'), os.fdopen(rdf_fd, 'w')
            rdf_with_labels_out = os.fdopen(rdf_with_labels_fd, 'w')
            for graph_name in self.graph_names:
                self._graph_triples(nt_out, graph_name)
            nt_out.close()

            with tempfile.TemporaryFile() as sorted_triples:
                subprocess.call(['sort', '-u', nt_name], stdout=sorted_triples)

                sorted_triples.seek(0)
                triples = itertools.chain(self._get_metadata(rdflib.URIRef(''),
                                                             data_dump_with_labels_url,
                                                             archive_graph_name),
                                          parse(sorted_triples, 'nt').get_triples())
                serialize(triples, rdf_out, 'rdf')
                rdf_out.close()

                sorted_triples.seek(0)
                triples = itertools.chain(self._get_metadata(rdflib.URIRef(''),
                                                             data_dump_with_labels_url,
                                                             archive_graph_name),
                                          self.with_labels(parse(sorted_triples, 'nt').get_triples()))
                serialize(triples, rdf_with_labels_out, 'rdf')
                rdf_with_labels_out.close()

            previous_name = os.path.join(archive_path, 'latest.rdf')
            # Only update if the file has changed, or hasn't been archived before.
            if not os.path.exists(previous_name) or not filecmp._do_cmp(previous_name, rdf_name):
                new_name = os.path.join(archive_path,
                                        self.updated.astimezone(pytz.utc).isoformat() + '.rdf')
                shutil.move(rdf_name, new_name)
                os.chmod(new_name, 0644)
                if os.path.exists(previous_name):
                    os.unlink(previous_name)
                os.symlink(new_name, previous_name)

                new_with_labels_name = os.path.join(archive_path, 'latest-with-labels.rdf')
                shutil.move(rdf_with_labels_name, new_with_labels_name)
                os.chmod(new_with_labels_name, 0644)

                # Upload the metadata to the store using an absolute URI.
                metadata = self._get_metadata(data_dump_url, data_dump_with_labels_url, archive_graph_name)
                Uploader.upload([self.store], archive_graph_name, graph=metadata)
        finally:
            os.unlink(nt_name)
            if os.path.exists(rdf_name):
                os.unlink(rdf_name)
            self.filter_old_archives(archive_path)