Пример #1
0
def load(fh, as_class=dict,
         tz_aware=True, uuid_subtype=bson.OLD_UUID_SUBTYPE):
    """Decode BSON data to multiple documents.

    `fh` must be a file-like object of concatenated, valid,
    BSON-encoded documents.

    :Parameters:
      - `fh`: a file-like object supporting ``.read()``
      - `as_class` (optional): the class to use for the resulting
        documents
      - `tz_aware` (optional): if ``True``, return timezone-aware
        :class:`~datetime.datetime` instances
    """
    while True:
        obj_size = fh.read(S_INT32)
        if len(obj_size) == 0:
            return

        obj_size = struct.unpack("<i", obj_size)[0]
        data = fh.read(obj_size - S_INT32)

        if len(data) + S_INT32 < obj_size:
            raise bson.InvalidBSON("objsize too large")
        if not data or data[-1] != ZERO:
            raise bson.InvalidBSON("bad eoo")

        elements = data[:-1]
        yield bson._elements_to_dict(
            elements, as_class, tz_aware, uuid_subtype)
Пример #2
0
def load(fh, as_class=dict,
              tz_aware=True, uuid_subtype=bson.OLD_UUID_SUBTYPE):
    """Decode BSON data to multiple documents.

    `fh` must be a file-like object of concatenated, valid, 
    BSON-encoded documents.

    :Parameters:
      - `fh`: a file-like object supporting ``.read()``
      - `as_class` (optional): the class to use for the resulting
        documents
      - `tz_aware` (optional): if ``True``, return timezone-aware
        :class:`~datetime.datetime` instances
    """
    while True:
        obj_size = fh.read(S_INT32)
        if len(obj_size) == 0:
            return
        
        obj_size = struct.unpack("<i", obj_size)[0]
        data = fh.read(obj_size - S_INT32)

        if len(data) + S_INT32 < obj_size:
            raise bson.InvalidBSON("objsize too large")
        #if bytes(data[-1]) != ZERO:
        #    raise bson.InvalidBSON("bad eoo")
        if data[-1] != 0:
            raise bson.InvalidBSON("bad eoo")
        
        elements = data[:-1]
        yield bson._elements_to_dict(elements, as_class, tz_aware, uuid_subtype)
Пример #3
0
 def __inflated(self):
     if self.__inflated_doc is None:
         # We already validated the object's size when this document was
         # created, so no need to do that again.
         # Use SON to preserve ordering of elements.
         self.__inflated_doc = _elements_to_dict(
             self.__raw, 4, len(self.__raw)-1, self.__codec_options, SON())
     return self.__inflated_doc
 def __inflated(self):
     if self.__inflated_doc is None:
         # We already validated the object's size when this document was
         # created, so no need to do that again.
         # Use SON to preserve ordering of elements.
         self.__inflated_doc = _elements_to_dict(self.__raw, 4,
                                                 len(self.__raw) - 1,
                                                 self.__codec_options,
                                                 SON())
     return self.__inflated_doc
Пример #5
0
 def _read(self):
     try:
         size_bits = self.fh.read(4)
         size = struct.unpack("<i", size_bits)[0] - 4 # BSON size byte includes itself 
         data = self.fh.read(size)
         if len(data) != size:
             raise struct.error("Unable to cleanly read expected BSON Chunk; EOF, underful buffer or invalid object size.")
         if data[size - 1] != "\x00":
             raise InvalidBSON("Bad EOO in BSON Data")
         chunk = data[:size - 1]
         doc = _elements_to_dict(chunk, dict, True)
         return doc
     except struct.error, e:
         #print >> sys.stderr, "Parsing Length record failed: %s" % e
         self.eof = True
         raise StopIteration(e)
Пример #6
0
 def _read(self):
     try:
         size_bits = self.fh.read(4)
         size = struct.unpack("<i", size_bits)[0] - 4 # BSON size byte includes itself 
         data = self.fh.read(size)
         if len(data) != size:
             raise struct.error("Unable to cleanly read expected BSON Chunk; EOF, underful buffer or invalid object size.")
         if data[size - 1] != "\x00":
             raise InvalidBSON("Bad EOO in BSON Data")
         chunk = data[:size - 1]
         doc = _elements_to_dict(chunk, dict, True)
         return doc
     except struct.error, e:
         #print >> sys.stderr, "Parsing Length record failed: %s" % e
         self.eof = True
         raise StopIteration(e)
Пример #7
0
def _get_object(data, position, as_class, tz_aware,
                uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
    obj_size = struct.unpack("<i", data[position:position + 4])[0]
    encoded = data[position + 4:position + obj_size - 1]
    object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype,
        compile_re)
    position += obj_size
    if "$ref" in object:
        return (DBRef(object.pop("$ref"), object.pop("$id"),
                      object.pop("$db", None), object), position)
    if '__customtype__' in object:
        code = object.pop('__customtype__')
        unpickler = Extension.getunpickler(code)
        if not unpickler:
            raise ValueError("Unknown custom type: %r" % code)
        return unpickler(code, object), position

    return object, position
Пример #8
0
def readBSONFile( fileName , callback ):
    inp = open( fileName )

    while True:
        x = inp.read(4)

        if len(x) == 0:
            break

        if len(x) < 4:
            raise Exception( "bad - need int for lenght and only got %d bytes " % len(x) )
            
        obj_size = struct.unpack( "<i" , x )[0]

        elements = inp.read( obj_size - 5 )
        callback( bson._elements_to_dict( elements , dict , True ) )

        # this is because of the -5 above
        inp.read(1)
Пример #9
0
def readBSONFile(fileName, callback):
    inp = open(fileName)

    while True:
        x = inp.read(4)

        if len(x) == 0:
            break

        if len(x) < 4:
            raise Exception(
                "bad - need int for lenght and only got %d bytes " % len(x))

        obj_size = struct.unpack("<i", x)[0]

        elements = inp.read(obj_size - 5)
        callback(bson._elements_to_dict(elements, dict, True))

        # this is because of the -5 above
        inp.read(1)
Пример #10
0
class BSONSource(pump.Source):
    """Reads bson file."""

    def __init__(self, opts, spec, source_bucket, source_node,
                 source_map, sink_map, ctl, cur):
        super(BSONSource, self).__init__(opts, spec, source_bucket, source_node,
                                         source_map, sink_map, ctl, cur)
        self.done = False
        self.f = None

    @staticmethod
    def can_handle(opts, spec):
        return spec.startswith(BSON_SCHEME) and \
            os.path.isfile(spec.replace(BSON_SCHEME, ""))

    @staticmethod
    def check(opts, spec):
        return 0, {'spec': spec,
                   'buckets': [{'name': os.path.basename(spec),
                                'nodes': [{'hostname': 'N/A'}]}]}

    @staticmethod
    def provide_design(opts, source_spec, source_bucket, source_map):
        return 0, None

    def provide_batch(self):
        if self.done:
            return 0, None

        if not self.f:
            try:
                self.f = open(self.spec.replace(BSON_SCHEME, ""))
            except IOError, e:
                return "error: could not open bson: %s; exception: %s" % \
                    (self.spec, e), None

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        cmd = couchbaseConstants.CMD_TAP_MUTATION
        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (self.f and
               batch.size() < batch_max_size and
               batch.bytes < batch_max_bytes):
            doc_size_buf = self.f.read(4)
            if not doc_size_buf:
                self.done = True
                self.f.close()
                self.f = None
                break
            doc_size, = struct.unpack("<i", doc_size_buf)
            doc_buf = self.f.read(doc_size - 4)
            if not doc_buf:
                self.done = True
                self.f.close()
                self.f = None
                break
            doc = bson._elements_to_dict(doc_buf, dict, True)
            key = doc['_id']
            doc_json = json.dumps(doc)
            msg = (cmd, vbucket_id, key, flg, exp, cas, '', doc_json, 0, 0, 0)
            batch.append(msg, len(doc))

        if batch.size() <= 0:
            return 0, None
        return 0, batch