示例#1
0
    def skipparse(self, dataset, size):
        """Parse dataset as an N-Triples/N3 file."""
        if not hasattr(dataset, 'read'):
            raise ParseError("Item to parse must be a file-like object.")

        self.file = codecs.getreader('utf-8')(
            dataset
        )  # since N-Triples 1.1 files can and should be utf-8 encoded
        self.buffer = ''
        line = ''
        nb_lines = 0
        logger.info(f'Start parsing the RDF file... {nb_lines} line written')
        while size == -1 or nb_lines < size:
            line = self.readline()
            self.line = line
            if self.line is None:
                break
            try:
                self.parseline()
                nb_lines += 1
                if nb_lines % 10000 == 0:
                    logger.info(
                        f'Parsing the RDF file... {nb_lines} lines written')
            except (ParseError, UnicodeEncodeError) as exception:
                logger.warning(
                    f"parse error: dropping {line}. Reason {exception}")
                continue
        logger.info(f'Parsing complete... {nb_lines} lines written')
        return self.sink
示例#2
0
    def parseline(self) -> Generator:
        """
        Parse each line and yield triples.

        Parameters
        ----------
        Generator
            A generator

        """
        print(self.line)
        self.eat(r_wspace)
        if self.line or not self.line.startswith('#'):
            subject = self.subject()
            self.eat(r_wspaces)

            predicate = self.predicate()
            self.eat(r_wspaces)

            object = self.object()
            self.eat(r_tail)

            if self.line:
                raise ParseError("Trailing garbage")
            return self.sink.triple(subject, predicate, object)
示例#3
0
 def eat(self, pattern):
     m = pattern.match(self.line)
     if not m:  # @@ Why can't we get the original pattern?
         # print(dir(pattern))
         # print repr(self.line), type(self.line)
         raise ParseError('Failed to eat %s at %s' %
                          (pattern.pattern, self.line))
     self.line = self.line[m.end():]
     return m
示例#4
0
def parse_literal(strng):
    m = r_literal.match(strng)
    if not m:
        m = r_literal.match(strng.replace("\\", ""))
    if not m:
        raise ParseError("Failed to eat %s at %s" % (r_literal.pattern, strng))
    lit, lang, dtype = m.groups()
    if lang:
        lang = lang
    else:
        lang = None
    if dtype:
        dtype = dtype
    else:
        dtype = None
    if lang and dtype:
        raise ParseError("Can't have both a language and a datatype")
    lit = unquote(lit)
    return Literal(lit, lang, dtype)
示例#5
0
 def parseline(self):
     subject = self.subject()
     self.eat(r_wspace)
     predicate = self.predicate()
     self.eat(r_wspace)
     obj = self.object()
     self.eat(r_tail)
     if self.line:
         raise ParseError("Trailing garbage")
     return subject, predicate, obj
示例#6
0
    def parse(self, inputsource, sink, **kwargs):
        """Parse f as an N-Triples file."""
        assert sink.store.context_aware, ("NQuadsParser must be given"
                                          " a context aware store.")
        self.sink = sink
        
        source = inputsource.getByteStream()
        
        if not hasattr(source, 'read'):
            raise ParseError("Item to parse must be a file-like object.")

        self.file = source
        self.buffer = ''
        while True:
            self.line = __line = self.readline()
            if self.line is None: break
            try: self.parseline()
            except ParseError, msg:
                raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
示例#7
0
文件: nqstream.py 项目: Darnok99/nel
 def iter(self, inputsource):
     """Iter f as an N-Quads file."""
     inputsource = create_input_source(source=inputsource, format='nquads')
     source = inputsource.getByteStream()
     if not hasattr(source, 'read'):
         raise ParseError("Item to parse must be a file-like object.")
     source = getreader('utf-8')(source)
     self.file = source
     self.buffer = ''
     while True:
         self.line = __line = self.readline()
         if self.line is None:
             break
         self.eat(r_wspace)
         if (not self.line) or self.line.startswith(('#')):
             continue  # The line is empty or a comment
         try:
             yield self.parseline()
         except ParseError as msg:
             raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
示例#8
0
    def parse(self, inputsource, sink, bnode_context=None, **kwargs):
        """
        Parse inputsource as an N-Quads file.

        :type inputsource: `rdflib.parser.InputSource`
        :param inputsource: the source of N-Quads-formatted data
        :type sink: `rdflib.graph.Graph`
        :param sink: where to send parsed triples
        :type bnode_context: `dict`, optional
        :param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
                              See `.NTriplesParser.parse`
        """
        assert sink.store.context_aware, ("NQuadsParser must be given"
                                          " a context aware store.")
        self.sink = ConjunctiveGraph(store=sink.store,
                                     identifier=sink.identifier)

        source = inputsource.getCharacterStream()
        if not source:
            source = inputsource.getByteStream()
            source = getreader("utf-8")(source)

        if not hasattr(source, "read"):
            raise ParseError("Item to parse must be a file-like object.")

        self.file = source
        self.buffer = ""
        while True:
            self.line = __line = self.readline()
            if self.line is None:
                break
            try:
                self.parseline(bnode_context)
            except ParseError as msg:
                raise ParseError("Invalid line (%s):\n%r" % (msg, __line))

        return self.sink
示例#9
0
    def parse(self, filename: str) -> Generator:
        """
        Parses an N-Triples file and yields triples.

        Parameters
        ----------
        filename: str
            The filename to parse

        Returns
        -------
        Generator
            A generator for triples

        """
        if not hasattr(filename, "read"):
            raise ParseError("Item to parse must be a file-like object.")

        # since N-Triples 1.1 files can and should be utf-8 encoded
        f = codecs.getreader("utf-8")(filename)

        self.file = f
        self.buffer = ""
        while True:
            self.line = self.readline()
            if self.line is None:
                break
            if self.line == "":
                raise ParseError(f"Empty line encountered in {filename}. "
                                 f"Ensure that no leading or trailing empty lines persist "
                                 f"in the N-Triples file.")
                break
            try:
                yield from self.parseline()
            except ParseError:
                raise ParseError("Invalid line: %r" % self.line)
示例#10
0
 def get_triples(self):
     self.mode = 'parse'
     parser = NTriplesParser_()
     parser.sink = self.Sink(self)
     self.triple = None
     while True:
         parser.line = self._stream.readline().strip().decode('utf-8')
         if not parser.line:
             break
         try:
             parser.parseline()
         except ParseError:
             raise ParseError("Invalid line: %r" % parser.line)
         if self.triple:
             yield self.triple
             self.triple = None
示例#11
0
 def literal(self):
     if self.peek('"'):
         lit, lang, dtype = self.eat(r_literal).groups()
         if lang:
             lang = lang
         else:
             lang = None
         if dtype:
             dtype = dtype
         else:
             dtype = None
         if lang and dtype:
             raise ParseError("Can't have both a language and a datatype")
         lit = unquote(lit)
         return rdfuri.Literal(lit, dtype, lang)
     return False
示例#12
0
    def parseline(self):
        self.eat(r_wspace)
        if (not self.line) or self.line.startswith(b('#')):
            return  # The line is empty or a comment

        subject = self.subject()
        self.eat(r_wspaces)

        predicate = self.predicate()
        self.eat(r_wspaces)

        obj = self.object()
        self.eat(r_wspaces)

        context = self.uriref()
        self.eat(r_tail)

        if self.line:
            raise ParseError("Trailing garbage")
        # Must have a context aware store - add on a normal Graph
        # discards anything where the ctx != graph.identifier
        self.sink.store.add((subject, predicate, obj), context)
示例#13
0
    def skipparse(self, f):
        """Parse f as an N-Triples file."""
        if not hasattr(f, 'read'):
            raise ParseError("Item to parse must be a file-like object.")

        # since N-Triples 1.1 files can and should be utf-8 encoded
        f = codecs.getreader('utf-8')(f)

        self.file = f
        self.buffer = ''
        while True:
            self.line = self.readline()
            if self.line is None:
                break
            try:
                self.parseline()
            except ParseError:
                logger.warning(
                    f"parse error: dropping {self.line}. Reason {sys.exc_info()[0]}"
                )
                continue
                # raise ParseError("Invalid line: %r" % self.line)
        return self.sink
示例#14
0
 def context(self):
     context = self.uriref()
     if not context:
         raise ParseError("Context must be a uriref")
     return context