Пример #1
0
    def test_exception_wrapping(self):
        # No matter what exception is raised while trying to decode BSON,
        # the final exception always matches InvalidBSON and the original
        # traceback is preserved.

        # Invalid Python regex, though valid PCRE.
        # Causes an error in re.compile().
        bad_doc = BSON.encode({'r': Regex(r'[\w-\.]')})

        try:
            decode_all(bad_doc)
        except InvalidBSON:
            exc_type, exc_value, exc_tb = sys.exc_info()
            # Original re error was captured and wrapped in InvalidBSON.
            self.assertEqual(exc_value.args[0], 'bad character range')

            # Traceback includes bson module's call into re module.
            for filename, lineno, fname, text in traceback.extract_tb(exc_tb):
                if filename.endswith('re.py') and fname == 'compile':
                    # Traceback was correctly preserved.
                    break
            else:
                self.fail('Traceback not captured')
        else:
            self.fail('InvalidBSON not raised')
Пример #2
0
 def rows_func(rows):
     try:
         bson_data = bson.decode_all(rows)[0]
         rows_data = bson_data['array']
         #key_indices = bson_data['keyindices']
         acc_wrapper._set_data(list(init_acc_values))
         for row in rows_data:
             row_wrapper.load_row(row)
             aggregator_function(acc_wrapper, row_wrapper)
         result = []
         for key_index in key_indices_wrapper:
             answer = rows_data[0][key_index]
             result.append(answer)
         result.extend(acc_wrapper._get_data())
         return numpy_to_bson_friendly(result)
     except Exception as e:
         try:
             e_msg = unicode(e)
         except:
             e_msg = u'<unable to get exception message>'
         try:
             e_row = unicode(bson.decode_all(rows)[0]['array'])
         except:
             e_row = u'<unable to get row data>'
         try:
             msg = base64.urlsafe_b64encode((u'Exception: %s running UDF on row: %s' % (e_msg, e_row)).encode('utf-8'))
         except:
             msg = base64.urlsafe_b64encode(u'Exception running UDF, unable to provide details.'.encode('utf-8'))
         raise IaPyWorkerError(msg)
Пример #3
0
def main():
    print 'Are you sure you want to run this?'
    return
    db.command('dropDatabase')
    print 'Creating indices'
    print ' leden'
    Es.ensure_indices()
    print ' moderation'
    Es_mod.ensure_indices()
    print ' planning'
    Es_plan.ensure_indices()
    print ' poll'
    Es_poll.ensure_indices()
    print ' regl'
    Es_regl.ensure_indices()
    print ' subscriptions'
    Es_subscr.ensure_indices()
    print
    print 'Restoring data'
    print ' entities'
    for e in bson.decode_all(open('entities.bsons').read()):
        db['entities'].save(e)
    print ' relations'
    for e in bson.decode_all(open('relations.bsons').read()):
        db['relations'].save(e)
    print ' events'
    for e in bson.decode_all(open('events.bsons').read()):
        db['events'].save(e)
Пример #4
0
    def translate_response(self, s=None, force_reply=False):
        """
        translate_response(s=None, force_reply=False)
        : translate incoming loco packet

        s : default to None, socket, None if want to using default socket
        force_reply : default to False, return result of command-sent-by-pykakao only
                      (other packets will be sent to handle_packet)
        """

        if not s:
            if not self.s:
                print "error translate_response: connection required"
                return None
            else:
                s = self.s

        result = {}
        head = s.recv(4)
        if not head:
            print "error translate_response: connection closed"

            s.close()
            s = None

            return None
        elif head == "\xFF\xFF\xFF\xFF":
            body = s.recv(18)

            result["packet_id"] = head
            result["status_code"] = body[0:2]
            result["command"] = body[2:13].replace("\x00", "")
            result["body_type"] = body[13:14]
            result["body_length"] = struct.unpack("I", body[14:18])[0]
            result["body"] = decode_all(s.recv(result["body_length"]))[0]

            return result
        else:
            body_length = struct.unpack("I", head)[0]
            body = self.dec_aes(s.recv(body_length))

            result["packet_id"] = body[0:4]
            result["status_code"] = body[4:6]
            result["command"] = body[6:17].replace("\x00", "")
            result["body_type"] = body[17:18]
            result["body_length"] = struct.unpack("I", body[18:22])[0]
            result["body"] = decode_all(body[22:])[0]

            if result["packet_id"] != "\xFF\xFF\xFF\xFF" and force_reply:
                self.handle_packet(result)

                return self.translate_response(s, force_reply)
            else:
                return result
Пример #5
0
    def test_exception_wrapping(self):
        # No matter what exception is raised while trying to decode BSON,
        # the final exception always matches InvalidBSON.

        # {'s': '\xff'}, will throw attempting to decode utf-8.
        bad_doc = b'\x0f\x00\x00\x00\x02s\x00\x03\x00\x00\x00\xff\x00\x00\x00'

        with self.assertRaises(InvalidBSON) as context:
            decode_all(bad_doc)

        self.assertIn("codec can't decode byte 0xff",
                      str(context.exception))
    def test_legacy_csharp_uuid(self):
        data = self.csharp_data

        # Test decoding
        docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY))
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD))
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY))
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY))
        for d in docs:
            self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        # Test encoding
        encoded = b''.join([
            bson.BSON.encode(doc,
                             False,
                             CodecOptions(uuid_representation=PYTHON_LEGACY))
            for doc in docs])
        self.assertNotEqual(data, encoded)

        encoded = b''.join([
            bson.BSON.encode(doc,
                             False,
                             CodecOptions(uuid_representation=STANDARD))
            for doc in docs])
        self.assertNotEqual(data, encoded)

        encoded = b''.join(
            [bson.BSON.encode(doc,
                              False,
                              CodecOptions(uuid_representation=JAVA_LEGACY))
             for doc in docs])
        self.assertNotEqual(data, encoded)

        encoded = b''.join(
            [bson.BSON.encode(doc,
                              False,
                              CodecOptions(uuid_representation=CSHARP_LEGACY))
             for doc in docs])
        self.assertEqual(data, encoded)
Пример #7
0
def _mongodb_decode_wire_protocol(message):
    """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """
    MONGO_OPS = {
        2001: 'msg',
        2002: 'insert',
        2003: 'reserved',
        2004: 'query',
        2005: 'get_more',
        2006: 'delete',
        2007: 'kill_cursors',
    }
    _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20])
    op = MONGO_OPS.get(opcode, 'unknown')
    zidx = 20
    collection_name_size = message[zidx:].find('\0')
    collection_name = message[zidx:zidx+collection_name_size]
    zidx += collection_name_size + 1
    skip, limit = struct.unpack('<ii', message[zidx:zidx+8])
    zidx += 8
    msg = ""
    try:
        if message[zidx:]:
            msg = bson.decode_all(message[zidx:], as_class=dict, tz_aware=False)
    except Exception, e:
        msg = 'invalid bson'
    def test_backports(self):
        doc = BSON.encode({"tuple": (1, 2)})
        exp = {"tuple": [1, 2]}
        options = CodecOptions(uuid_representation=ALL_UUID_REPRESENTATIONS[0],
                               tz_aware=False, document_class=dict)

        self.assertEqual(
            {"tuple": [1, 2]},
            BSON.encode(
                {"tuple": (1, 2)}, codec_options=options,
                uuid_subtype=ALL_UUID_REPRESENTATIONS[1]).decode())
        self.assertEqual(exp, doc.decode(
            as_class=SON,
            tz_aware=True,
            uuid_subtype=ALL_UUID_REPRESENTATIONS[1],
            codec_options=options))
        self.assertEqual([exp], list(decode_iter(
            doc,
            as_class=SON,
            tz_aware=True,
            uuid_subtype=ALL_UUID_REPRESENTATIONS[1],
            codec_options=options)))
        self.assertEqual([exp], list(decode_file_iter(
            StringIO(doc),
            as_class=SON,
            tz_aware=True,
            uuid_subtype=ALL_UUID_REPRESENTATIONS[1],
            codec_options=options)))
        self.assertEqual([exp], decode_all(
            doc, SON, True, ALL_UUID_REPRESENTATIONS[1], True, options))
Пример #9
0
def _mongodb_decode_wire_protocol(message):
    """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """
    MONGO_OPS = {
        1000: "msg",
        2001: "update",
        2002: "insert",
        2003: "reserved",
        2004: "query",
        2005: "get_more",
        2006: "delete",
        2007: "kill_cursors",
    }
    _, msg_id, _, opcode, _ = struct.unpack("<iiiii", message[:20])
    op = MONGO_OPS.get(opcode, "unknown")
    zidx = 20
    collection_name_size = message[zidx:].find("\0")
    collection_name = message[zidx : zidx + collection_name_size]
    if ".system." in collection_name:
        return
    zidx += collection_name_size + 1
    skip, limit = struct.unpack("<ii", message[zidx : zidx + 8])
    zidx += 8
    msg = ""
    try:
        if message[zidx:]:
            msg = bson.decode_all(message[zidx:])
    except:
        msg = "invalid bson"
    return {"op": op, "collection": collection_name, "msg_id": msg_id, "skip": skip, "limit": limit, "query": msg}
Пример #10
0
def _decode_docs(message, deep_decode):
    try:
        if deep_decode:
            return bson.decode_all(message)
        return [dict(not_decoded=True)]
    except InvalidBSON, e:
        return [dict(decode_error='invalid bson:  %s' % e)]
Пример #11
0
    def _checkForUpdate( self ):
        """ Update the agent if possible """

        res = urllib2.urlopen( self.settings.version_url % { 'key' : self.settings.mms_key } )

        resBson = None
        try:
            resBson = bson.decode_all( res.read() )
        finally:
            if res is not None:
                res.close()
                res = None

        if len(resBson) != 1:
            return

        versionResponse = resBson[0]

        if 'status' not in versionResponse or versionResponse['status'] != 'ok':
            return

        if 'agentVersion' not in versionResponse or 'authCode' not in versionResponse:
            return

        remoteAgentVersion = versionResponse['agentVersion']
        authCode =  versionResponse['authCode']

        if authCode != hmac.new( self.settings.secret_key, remoteAgentVersion, digestmod=hashlib.sha1 ).hexdigest():
            self.logger.error( 'Invalid auth code - please confirm your secret key (defined on Settings page) is correct and hmac is properly installed - http://mms.10gen.com/help/' )
            return

        if self._shouldUpgradeAgent( self.settings.settingsAgentVersion, remoteAgentVersion ):
            self._upgradeAgent( remoteAgentVersion )
Пример #12
0
def _unpack_response(response, cursor_id=None, as_class=dict, tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
    """Unpack a response from the database.

    Check the response for errors and unpack, returning a dictionary
    containing the response data.

    :Parameters:
      - `response`: byte string as returned from the database
      - `cursor_id` (optional): cursor_id we sent to get this response -
        used for raising an informative exception when we get cursor id not
        valid at server response
      - `as_class` (optional): class to use for resulting documents
    """
    response_flag = struct.unpack("<i", response[:4])[0]
    if response_flag & 1:
        # Shouldn't get this response if we aren't doing a getMore
        assert cursor_id is not None

        raise OperationFailure("cursor id '%s' not valid at server" % cursor_id)
    elif response_flag & 2:
        error_object = bson.BSON(response[20:]).decode()
        if error_object["$err"].startswith("not master"):
            raise AutoReconnect(error_object["$err"])
        elif error_object.get("code") == 50:
            raise ExecutionTimeout(error_object["$err"], error_object["code"])
        raise OperationFailure("database error: %s" % error_object["$err"])

    result = {}
    result["cursor_id"] = struct.unpack("<q", response[4:12])[0]
    result["starting_from"] = struct.unpack("<i", response[12:16])[0]
    result["number_returned"] = struct.unpack("<i", response[16:20])[0]
    result["data"] = bson.decode_all(response[20:], as_class, tz_aware, uuid_subtype)
    assert len(result["data"]) == result["number_returned"]
    return result
Пример #13
0
def parse_update(data, length):
    #struct OP_UPDATE {
    #    MsgHeader header;             // standard message header
    #    int32     ZERO;               // 0 - reserved for future use
    #    cstring   fullCollectionName; // "dbname.collectionname"
    #    int32     flags;              // bit vector. see below
    #    document  selector;           // the query to select the document
    #    document  update;             // specification of the update to perform
    #}
    val, pos = bson._get_int(data, 16)
    collection, pos = bson._get_c_string(data, pos)
    flags, pos = bson._get_int(data, pos)
    selector, update = "", ""
    try:
        o = bson.decode_all(data[pos:length])
        selector, update = o
    except Exception as e:
        logger.exception()

    # flags.
    # 0	Upsert	If set, the database will insert the supplied object into the collection if no matching document is found.
    # 1	MultiUpdate	If set, the database will update all matching objects in the collection. Otherwise only updates first matching doc.
    # 2-31	Reserved	Must be set to 0.
    upsert = check_bit(flags, 0)
    multi_update = check_bit(flags, 1)

    return Operation(operation=OP_UPDATE,
                     upsert=upsert,
                     multi_update=multi_update,
                     collection=collection,
                     selector=selector,
                     update=update)
Пример #14
0
def parse_insert(data, length):
    #struct {
    #    MsgHeader header;             // standard message header
    #    int32     flags;              // bit vector - see below
    #    cstring   fullCollectionName; // "dbname.collectionname"
    #    document* documents;          // one or more documents to insert into the collection
    #}
    # flags
    # 0	ContinueOnError	If set, the database will not stop processing a bulk insert if one fails
    # (eg due to duplicate IDs). This makes bulk insert behave similarly to a series of single inserts,
    # except lastError will be set if any insert fails, not just the last one. If multiple errors occur,
    # only the most recent will be reported by getLastError. (new in 1.9.1)
    # 1-31	Reserved	Must be set to 0.
    flags, pos = bson._get_int(data, 16)
    continue_on_error = check_bit(flags, 0)
    collection, pos = bson._get_c_string(data, pos)
    try:
        o = bson.decode_all(data[pos:])
    except bson.InvalidBSON as e:
        o = []
        logger.exception("exception on bson decode")

    return Operation(operation=OP_INSERT,
                     collection=collection,
                     continue_on_error=continue_on_error,
                     documents=o)
Пример #15
0
def parse_delete(data, length):
    # struct {
    #     MsgHeader header;             // standard message header
    #     int32     ZERO;               // 0 - reserved for future use
    #     cstring   fullCollectionName; // "dbname.collectionname"
    #     int32     flags;              // bit vector - see below for details.
    #     document  selector;           // query object.  See below for details.
    # }
    # flags:
    # 0	SingleRemove	If set, the database will remove only the first matching document in the collection. Otherwise all matching documents will be removed.
    # 1-31	Reserved	Must be set to 0.
    zero, pos = bson._get_int(data, 16)
    collection, pos = bson._get_c_string(data, pos)
    flags, pos = bson._get_int(data, pos)
    single_remove = check_bit(flags, 0)
    try:
        o = bson.decode_all(data[pos:length])
        selector = o[0]
    except Exception as e:
        selector = ""
        logger.exception("exception on bson decode")
    return Operation(operation=OP_DELETE,
                     collection=collection,
                     single_remove=single_remove,
                     selector=selector)
Пример #16
0
def _unpack_response(response, cursor_id=None, as_class=dict, tz_aware=False):
    """Unpack a response from the database.

    Check the response for errors and unpack, returning a dictionary
    containing the response data.

    :Parameters:
      - `response`: byte string as returned from the database
      - `cursor_id` (optional): cursor_id we sent to get this response -
        used for raising an informative exception when we get cursor id not
        valid at server response
      - `as_class` (optional): class to use for resulting documents
    """
    response_flag = struct.unpack("<i", response[:4])[0]
    if response_flag & 1:
        raise InterfaceError("cursor not valid at server")
    elif response_flag & 2:
        error_object = bson.BSON(response[20:]).decode()
        if error_object["$err"] == "not master":
            raise DatabaseError("master has changed")
        raise DatabaseError("database error: %s" %
                               error_object["$err"])

    result = {}
    result["cursor_id"] = struct.unpack("<q", response[4:12])[0]
    result["starting_from"] = struct.unpack("<i", response[12:16])[0]
    result["number_returned"] = struct.unpack("<i", response[16:20])[0]
    result["data"] = bson.decode_all(response[20:], as_class, tz_aware)
    assert len(result["data"]) == result["number_returned"]
    return result
Пример #17
0
    def test_polarizationtodb(self):

        import bson
        import gzip

        reference_dir = os.path.abspath(os.path.join(ref_dir, "ferroelectric_wf"))

        with gzip.open(os.path.join(reference_dir, "tasks.bson.gz")) as f:
            coll_raw = f.read()

        coll = bson.decode_all(coll_raw)

        db = self.get_task_collection()
        for c in coll:
            db.insert(c)

        new_fw_spec = {'_fw_env': {"db_file": os.path.join(db_dir, "db.json")},
                       'tags':['wfid_1494203093.06934658']}

        analysis = PolarizationToDb(db_file='>>db_file<<', name="_polarization_post_processing")
        analysis.run_task(new_fw_spec)

        # Check recovered change in polarization
        coll = self.get_task_collection("polarization_tasks")
        d = coll.find_one()
        self.assertAlmostEqual(d['polarization_change_norm'], 46.288752795325244, 5)
Пример #18
0
    def test_read_write_bson(self):
        self.coll.insert_many([{'_id': bson.objectid.ObjectId()}
                               for i in range(1000)])
        bson_location = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'spark_output')
        self.sc.mongoRDD(CONNECTION_STRING).saveToBSON(bson_location)
        try:
            # 'part-r-00000.bson' is a file name generated by Spark.
            bson_file = os.path.join(bson_location, 'part-r-00000.bson')
            with open(bson_file, 'rb') as fd:
                documents = bson.decode_all(fd.read())
                self.assertEqual(1000, len(documents))

            # Try loading the BSON file into Spark as a separate RDD.
            bson_rdd = self.sc.BSONFileRDD(bson_file)
            self.assertEqual(1000, bson_rdd.count())

            # Also try the pair version.
            bson_pair_rdd = self.sc.BSONFilePairRDD(bson_file)
            self.assertEqual(1000, bson_pair_rdd.count())
            first_element = bson_pair_rdd.first()
            self.assertIsInstance(first_element, tuple)
            self.assertEqual(2, len(first_element))
        finally:
            try:
                shutil.rmtree(bson_location)
            except Exception:
                pass
Пример #19
0
	def test_set(self):
		"success type (+OK)"
		self.query('DEL/hello')
		f = self.query('SET/hello/world.bson')
		self.assertTrue(f.headers.getheader('Content-Type') == 'application/bson')
		obj = bson.decode_all(f.read())
		self.assertTrue(obj == [{u'SET': [True, bson.Binary('OK', 0)]}])
Пример #20
0
def populate_main_sql_testdatabase(engine):
    meta = MetaData()

    table = Table('events', meta,
                  Column('id', Integer, primary_key=True, ),
                  Column('time', String(30)),
                  Column('source_ip', String(30)),
                  Column('source_port', String(30)),
                  Column('request_url', String(500)),
                  Column('request_raw', String(65536)),
                  Column('pattern', String(20)),
                  Column('filename', String(500)),
    )

    meta.create_all(engine)

    insert_dicts = []
    data = open(os.path.join(file_dir, 'data/events_500.bson'), 'r').read()
    for item in bson.decode_all(data):
        new_item = {"source_ip": item["source_ip"],
                    "source_port": item["source_port"],
                    "request_url": item["request"]["url"],
                    "pattern": item["pattern"]}

        insert_dicts.append(new_item)

    conn = engine.connect()
    print "Inserted: {0}".format(len(insert_dicts))
    conn.execute(table.insert(), insert_dicts)
def _mongodb_decode_wire_protocol(message):
    """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """
    MONGO_OPS = {
        1000: 'msg',
        2001: 'update',
        2002: 'insert',
        2003: 'reserved',
        2004: 'query',
        2005: 'get_more',
        2006: 'delete',
        2007: 'kill_cursors',
    }
    _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20])
    op = MONGO_OPS.get(opcode, 'unknown')
    zidx = 20
    collection_name_size = message[zidx:].find('\0')
    collection_name = message[zidx:zidx+collection_name_size]
    if '.system.' in collection_name:
        return
    zidx += collection_name_size + 1
    skip, limit = struct.unpack('<ii', message[zidx:zidx+8])
    zidx += 8
    msg = ""
    try:
        if message[zidx:]:
            msg = bson.decode_all(message[zidx:])
    except:
        msg = 'invalid bson'
    return { 'op': op, 'collection': collection_name,
             'msg_id': msg_id, 'skip': skip, 'limit': limit,
             'query': msg }
Пример #22
0
def readTmpFile( processPid ):
    """ Read the temp file """
    fileName = os.path.join( tempfile.gettempdir(), 'mms-' + str( processPid ) )

    if not os.path.isfile( fileName ):
        return None

    f = open( fileName )

    try:
        fileContent = f.read()

        # Handle the legacy json files
        if fileContent.startswith( '{' ):
            os.remove( fileName )
            return None

        resBson = bson.decode_all( fileContent )

        if len(resBson) != 1:
            return None

        return resBson[0]

    finally:
        f.close()
Пример #23
0
 def test_set(self):
     "success type (+OK)"
     self.query("DEL/hello")
     f = self.query("SET/hello/world.bson")
     self.assertTrue(f.headers.getheader("Content-Type") == "application/bson")
     obj = bson.decode_all(f.read())
     self.assertTrue(obj == [{u"SET": [True, bson.Binary("OK", 0)]}])
Пример #24
0
    def test_treestore(self):
        output = romanesco.convert(
            "tree",
            {"format": "newick", "data": self.newick},
            {"format": "r.apetree"})
        output = romanesco.convert("tree", output, {"format": "treestore"})
        self.assertEqual(output["format"], "treestore")
        rows = bson.decode_all(output["data"])
        for d in rows:
            if "rooted" in d:
                root = d
        self.assertNotEqual(root, None)
        self.assertEqual(len(root["clades"]), 1)

        def findId(id):
            for d in rows:
                if d["_id"] == id:
                    return d

        top = findId(root["clades"][0])
        self.assertEqual(len(top["clades"]), 2)
        internal = findId(top["clades"][0])
        rubribarbus = findId(top["clades"][1])
        ahli = findId(internal["clades"][0])
        allogus = findId(internal["clades"][1])
        self.assertEqual(internal["branch_length"], 2)
        self.assertEqual(ahli["name"], "ahli")
        self.assertEqual(ahli["branch_length"], 0)
        self.assertEqual(allogus["name"], "allogus")
        self.assertEqual(allogus["branch_length"], 1)
        self.assertEqual(rubribarbus["name"], "rubribarbus")
        self.assertEqual(rubribarbus["branch_length"], 3)
Пример #25
0
    def test_treestore(self):
        output = convert(
            'tree',
            {'format': 'newick', 'data': self.newick},
            {'format': 'r.apetree'})
        output = convert('tree', output, {'format': 'treestore'})
        self.assertEqual(output['format'], 'treestore')
        rows = bson.decode_all(output['data'])
        for d in rows:
            if 'rooted' in d:
                root = d
        self.assertNotEqual(root, None)
        self.assertEqual(len(root['clades']), 1)

        def findId(id):
            for d in rows:
                if d['_id'] == id:
                    return d

        top = findId(root['clades'][0])
        self.assertEqual(len(top['clades']), 2)
        internal = findId(top['clades'][0])
        rubribarbus = findId(top['clades'][1])
        ahli = findId(internal['clades'][0])
        allogus = findId(internal['clades'][1])
        self.assertEqual(internal['branch_length'], 2)
        self.assertEqual(ahli['name'], 'ahli')
        self.assertEqual(ahli['branch_length'], 0)
        self.assertEqual(allogus['name'], 'allogus')
        self.assertEqual(allogus['branch_length'], 1)
        self.assertEqual(rubribarbus['name'], 'rubribarbus')
        self.assertEqual(rubribarbus['branch_length'], 3)
Пример #26
0
def Parse_Messaged(path="rocketchat_message.bson", rID_name=None):
    """Parse the message file from rocketchat db: rocketchat_message.bson and resturn a table of the results."""
    # parsing messages
    if rID_name is None:
        room, rID_name = Parse_Rooms()

    bson_file = open(path, 'rb')
    messages = bson.decode_all(bson_file.read())

    res = []

    for i in messages:
        if 'u' in i:
            if 'msg' in i:
                if 'rid' in i:
                    tmp = []
                    tmp.append(i['rid'])
                    if i['rid'] in rID_name:
                        tmp.append(rID_name[i['rid']])
                    else:
                        tmp.append('None')
                    tmp.append(i['u']['username'])
                    tmp.append(i['ts'].isoformat())
                    tmp.append(i['msg'].replace('\t', ' '))
                    res.append(tmp)
    return res
Пример #27
0
def Parse_Rooms(path="rocketchat_room.bson"):
    """Parse the room file from rocket chat: rocketchat_room.bson and returns a table of the results plus a mapping of room ID and room names."""
    bson_file = open(path, 'rb')
    rooms = bson.decode_all(bson_file.read())
    # Parsing Rooms

    room = []
    rID_name = {}
    for i in rooms:
        Type = i['t']
        creation_date = i['ts'].isoformat()
        nb_msg = i['msgs']
        rID = i['_id']
        if Type != 'd':
            name = i['name']
            if name == 'general':
                creator = None
            else:
                creator = i['u']['username']
            rID_name[rID] = name
        else:
            name = None
            creator = None
        users = i['usernames']
        tmp = [str(i) for i in [rID, name, creator, creation_date, Type, nb_msg, ','.join(users)]]
        room.append(tmp)
    return room, rID_name
Пример #28
0
    def test_raw_batches(self):
        c = self.collection
        yield c.delete_many({})
        yield c.insert_many({'_id': i} for i in range(4))

        find = partial(c.find_raw_batches, {})
        agg = partial(c.aggregate_raw_batches, [{'$sort': {'_id': 1}}])

        for method in find, agg:
            cursor = method().batch_size(2)
            yield cursor.fetch_next
            batch = cursor.next_object()
            self.assertEqual([{'_id': 0}, {'_id': 1}], bson.decode_all(batch))

            lst = yield method().batch_size(2).to_list(length=1)
            self.assertEqual([{'_id': 0}, {'_id': 1}], bson.decode_all(lst[0]))
Пример #29
0
    async def test_iter_aggregate(self):
        collection = self.collection
        await collection.delete_many({})
        pipeline = [{'$sort': {'_id': 1}}]

        # Empty iterator.
        async for _ in collection.aggregate(pipeline):
            self.fail()

        for n_docs in 1, 2, 10:
            if n_docs:
                docs = [{'_id': i} for i in range(n_docs)]
                await collection.insert_many(docs)

            # Force extra batches to test iteration.
            j = 0
            cursor = collection.aggregate(pipeline).batch_size(3)
            async for doc in cursor:
                self.assertEqual(j, doc['_id'])
                j += 1

            self.assertEqual(j, n_docs)

            j = 0
            raw = collection.aggregate_raw_batches(pipeline).batch_size(3)
            async for batch in raw:
                j += len(bson.decode_all(batch))

            self.assertEqual(j, n_docs)
            await collection.delete_many({})
Пример #30
0
def parse_query(data, length):
    # struct OP_QUERY {
    #     MsgHeader header;                 // standard message header
    #     int32     flags;                  // bit vector of query options.  See below for details.
    #     cstring   fullCollectionName ;    // "dbname.collectionname"
    #     int32     numberToSkip;           // number of documents to skip
    #     int32     numberToReturn;         // number of documents to return
    #                                       //  in the first OP_REPLY batch
    #     document  query;                  // query object.  See below for details.
    #   [ document  returnFieldsSelector; ] // Optional. Selector indicating the fields
    #                                       //  to return.  See below for details.
    # }
    # flags:
    # 0   Reserved    Must be set to 0.
    # 1   TailableCursor  Tailable means cursor is not closed when the last data is retrieved.
    #      Rather, the cursor marks the final object's position. You can resume using the cursor later, from where it was located,
    #     if more data were received. Like any "latent cursor",
    #      the cursor may become invalid at some point (CursorNotFound) – for example if the final object it references were deleted.
    # 2   SlaveOk Allow query of replica slave. Normally these return an error except for namespace "local".
    # 3   OplogReplay Internal replication use only - driver should not set
    # 4   NoCursorTimeout The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use. Set this option to prevent that.
    # 5   AwaitData   Use with TailableCursor. If we are at the end of the data, block for a while rather than returning no data. After a timeout period, we do return as normal.
    # 6   Exhaust Stream the data down full blast in multiple "more" packages, on the assumption that the client will fully read all data queried. Faster when you are pulling a lot of data and know you want to pull it all down. Note: the client is not allowed to not read all the data unless it closes the connection.
    # 7   Partial Get partial results from a mongos if some shards are down (instead of throwing an error)
    # 8-31    Reserved    Must be set to 0.
    flags, pos = bson._get_int(data, 16)
    tailable_cursor = check_bit(flags, 1)
    slave_ok = check_bit(flags, 2)
    oplog_replay = check_bit(flags, 3)
    no_cursor_timeout = check_bit(flags, 4)
    await_data = check_bit(flags, 5)
    exhaust = check_bit(flags, 6)
    partial = check_bit(flags, 7)

    collection, pos = bson._get_c_string(data, pos)
    number_to_skip, pos = bson._get_int(data, pos)
    number_to_return, pos = bson._get_int(data, pos)
    fields_to_return = None
    query = ""
    try:
        o = bson.decode_all(data[pos:length])
        query = o[0]
    except bson.InvalidBSON as e:
        o = []
        logger.exception("exception on bson decode")

    if len(o) == 2:
        fields_to_return = o[1]
    return Operation(operation=OP_QUERY,fields_to_return=fields_to_return,
                        tailable_cursor = tailable_cursor,
                        slave_ok = slave_ok,
                        oplog_replay = oplog_replay,
                        no_cursor_timeout = no_cursor_timeout,
                        number_to_skip=number_to_skip,
                        number_to_return=number_to_return,
                        await_data = await_data,
                        exhaust = exhaust,
                        partial = partial,
                        query=query)
Пример #31
0
def _unpack_response(response, cursor_id=None, codec_options=CodecOptions()):
    """Unpack a response from the database.

    Check the response for errors and unpack, returning a dictionary
    containing the response data.

    Can raise CursorNotFound, NotMasterError, ExecutionTimeout, or
    OperationFailure.

    :Parameters:
      - `response`: byte string as returned from the database
      - `cursor_id` (optional): cursor_id we sent to get this response -
        used for raising an informative exception when we get cursor id not
        valid at server response
      - `codec_options` (optional): an instance of
        :class:`~bson.codec_options.CodecOptions`
    """
    response_flag = struct.unpack("<i", response[:4])[0]
    if response_flag & 1:
        # Shouldn't get this response if we aren't doing a getMore
        assert cursor_id is not None

        raise CursorNotFound("cursor id '%s' not valid at server" % cursor_id)
    elif response_flag & 2:
        error_object = bson.BSON(response[20:]).decode()
        if error_object["$err"].startswith("not master"):
            raise NotMasterError(error_object["$err"])
        elif error_object.get("code") == 50:
            raise ExecutionTimeout(error_object.get("$err"),
                                   error_object.get("code"), error_object)
        raise OperationFailure("database error: %s" % error_object.get("$err"),
                               error_object.get("code"), error_object)

    result = {}
    result["cursor_id"] = struct.unpack("<q", response[4:12])[0]
    result["starting_from"] = struct.unpack("<i", response[12:16])[0]
    result["number_returned"] = struct.unpack("<i", response[16:20])[0]
    result["data"] = bson.decode_all(response[20:], codec_options)
    assert len(result["data"]) == result["number_returned"]
    return result
Пример #32
0
def _unpack_response(response,
                     cursor_id=None,
                     codec_options=_UNICODE_REPLACE_CODEC_OPTIONS):
    """Unpack a response from the database and decode the BSON document(s).

    Check the response for errors and unpack, returning a dictionary
    containing the response data.

    Can raise CursorNotFound, NotMasterError, ExecutionTimeout, or
    OperationFailure.

    :Parameters:
      - `response`: byte string as returned from the database
      - `cursor_id` (optional): cursor_id we sent to get this response -
        used for raising an informative exception when we get cursor id not
        valid at server response
      - `codec_options` (optional): an instance of
        :class:`~bson.codec_options.CodecOptions`
    """
    result = _raw_response(response, cursor_id)
    result["data"] = bson.decode_all(result["data"][0], codec_options)
    return result
def _unpack_response(response,
                     cursor_id=None,
                     as_class=dict,
                     tz_aware=False,
                     uuid_subtype=OLD_UUID_SUBTYPE):
    """Unpack a response from the database.

    Check the response for errors and unpack, returning a dictionary
    containing the response data.

    :Parameters:
      - `response`: byte string as returned from the database
      - `cursor_id` (optional): cursor_id we sent to get this response -
        used for raising an informative exception when we get cursor id not
        valid at server response
      - `as_class` (optional): class to use for resulting documents
    """
    response_flag = struct.unpack("<i", response[:4])[0]
    if response_flag & 1:
        # Shouldn't get this response if we aren't doing a getMore
        assert cursor_id is not None

        raise OperationFailure("cursor id '%s' not valid at server" %
                               cursor_id)
    elif response_flag & 2:
        error_object = bson.BSON(response[20:]).decode()
        if error_object["$err"].startswith("not master"):
            raise AutoReconnect(error_object["$err"])
        raise OperationFailure("database error: %s" % error_object["$err"],
                               error_object)

    result = {}
    result["cursor_id"] = struct.unpack("<q", response[4:12])[0]
    result["starting_from"] = struct.unpack("<i", response[12:16])[0]
    result["number_returned"] = struct.unpack("<i", response[16:20])[0]
    result["data"] = bson.decode_all(response[20:], as_class, tz_aware,
                                     uuid_subtype)
    assert len(result["data"]) == result["number_returned"]
    return result
Пример #34
0
    def _checkForUpdate(self):
        """ Update the agent if possible """

        res = urllib.request.urlopen(self.settings.version_url %
                                     {'key': self.settings.mms_key})

        resBson = None
        try:
            resBson = bson.decode_all(res.read())
        finally:
            if res is not None:
                res.close()
                res = None

        if len(resBson) != 1:
            return

        versionResponse = resBson[0]

        if 'status' not in versionResponse or versionResponse['status'] != 'ok':
            return

        if 'agentVersion' not in versionResponse or 'authCode' not in versionResponse:
            return

        remoteAgentVersion = versionResponse['agentVersion']
        authCode = versionResponse['authCode']

        if authCode != hmac.new(self.settings.secret_key,
                                remoteAgentVersion,
                                digestmod=hashlib.sha1).hexdigest():
            self.logger.error(
                'Invalid auth code - please confirm your secret key (defined on Settings page) is correct and hmac is properly installed - http://mms.10gen.com/help/'
            )
            return

        if self._shouldUpgradeAgent(self.settings.settingsAgentVersion,
                                    remoteAgentVersion):
            self._upgradeAgent(remoteAgentVersion)
Пример #35
0
def decode_wire_protocol(message):
    """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """
    _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20])
    op = MONGO_OPS.get(opcode, 'unknown')
    zidx = 20
    collection_name_size = message[zidx:].find('\0')
    collection_name = message[zidx:zidx + collection_name_size]
    zidx += collection_name_size + 1
    skip, limit = struct.unpack('<ii', message[zidx:zidx + 8])
    zidx += 8
    try:
        msg = bson.decode_all(message[zidx:])
    except InvalidBSON:
        msg = 'invalid bson'
    return {
        'op': op,
        'collection': collection_name,
        'msg_id': msg_id,
        'skip': skip,
        'limit': limit,
        'query': msg,
    }
Пример #36
0
 def row_func(row):
     try:
         row_wrapper.load_row(row)
         return row_function(row_wrapper)
     except Exception as e:
         try:
             e_msg = unicode(e)
         except:
             e_msg = u'<unable to get exception message>'
         try:
             e_row = unicode(bson.decode_all(row)[0]['array'])
         except:
             e_row = u'<unable to get row data>'
         try:
             msg = base64.urlsafe_b64encode(
                 (u'Exception: %s running UDF on row: %s' %
                  (e_msg, e_row)).encode('utf-8'))
         except:
             msg = base64.urlsafe_b64encode(
                 u'Exception running UDF, unable to provide details.'.
                 encode('utf-8'))
         raise IaPyWorkerError(msg)
Пример #37
0
    def fill_db(self, collection_str):
        '''Check if collection is already in MongoDB 
        
        If already in MongoDB:
            Do nothing
        Else:
            Load data into db from quiltdata (karrlab/datanator)

        Args:
            collection_str: name of collection (e.g. 'ecmdb', 'pax', etc)
        '''
        _, _, collection = self.con_db(collection_str)
        if collection.find({}).count() != 0:
            return collection
        else:
            manager = wc_utils.quilt.QuiltManager(path=self.cache_dirname,
                                                  package='datanator')
            filename = collection_str + '.bson'
            manager.download_package(filename)
            with open((self.cache_dirname + '/' + filename), 'rb') as f:
                collection.insert(decode_all(f.read()))
            return collection
Пример #38
0
 def test_basic_decode(self):
     self.assertEqual({"test": u("hello world")},
                      BSON(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74\x00\x0C"
                           b"\x00\x00\x00\x68\x65\x6C\x6C\x6F\x20\x77\x6F"
                           b"\x72\x6C\x64\x00\x00").decode())
     self.assertEqual([{"test": u("hello world")}, {}],
                      decode_all(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
                                 b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
                                 b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
                                 b"\x05\x00\x00\x00\x00"))
     self.assertEqual([{"test": u("hello world")}, {}],
                      list(decode_iter(
                         b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
                         b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
                         b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
                         b"\x05\x00\x00\x00\x00")))
     self.assertEqual([{"test": u("hello world")}, {}],
                      list(decode_file_iter(StringIO(
                         b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
                         b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
                         b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
                         b"\x05\x00\x00\x00\x00"))))
Пример #39
0
    async def test_iter_cursor(self):
        collection = self.collection
        await collection.delete_many({})

        for n_docs in 0, 1, 2, 10:
            if n_docs:
                docs = [{'_id': i} for i in range(n_docs)]
                await collection.insert_many(docs)

            # Force extra batches to test iteration.
            j = 0
            async for doc in collection.find().sort('_id').batch_size(3):
                self.assertEqual(j, doc['_id'])
                j += 1

            self.assertEqual(j, n_docs)

            j = 0
            raw_cursor = collection.find_raw_batches().sort('_id').batch_size(3)
            async for batch in raw_cursor:
                j += len(bson.decode_all(batch))

            await collection.delete_many({})
Пример #40
0
def parse_reply(data, length):
    #     struct {
    #     MsgHeader header;         // standard message header
    #     int32     responseFlags;  // bit vector - see details below
    #     int64     cursorID;       // cursor id if client needs to do get more's
    #     int32     startingFrom;   // where in the cursor this reply is starting
    #     int32     numberReturned; // number of documents in the reply
    #     document* documents;      // documents
    # }
    flags, pos = bson._get_int(data, 16)
    cursor_id, pos = bson._get_long(data, pos,as_class=None, tz_aware=False, uuid_subtype=3)
    starting_from, pos = bson._get_int(data, pos)
    number_returned, pos = bson._get_int(data, pos)
    try:
        o = bson.decode_all(data[pos:length])
    except Exception as e:
        o = []
        logger.exception("exception on bson decode in parse_reply")
        logger.info(repr(data[pos:length]))

    return Operation(operation=OP_REPLY, cursor_id=cursor_id,
                     starting_from=starting_from, number_returned=number_returned,
                     documents=o)
Пример #41
0
def parse_pinterest(**kwargs):
    if os.path.isfile(BIN_DATA[PINTEREST]):
        LOG.info("Already processed, skipping.")
        return

    data_file = 'subset_iccv_board_pins.bson'
    source_file = os.path.join(DOWNLOAD[PINTEREST], data_file)
    if not glob(source_file):
        raise Exception("Cannot find pinterest dataset")

    LOG.info("Parsing pinterest")

    with open(source_file, 'rb') as f:
        bsob = bson.decode_all(f.read())

    map_id_pin = dict()
    map_pin_id = dict()
    map_board_id = dict()
    map_id_board = dict()
    pins = 0

    board_pin_pairs = []
    for i, board in enumerate(bsob):
        map_id_board[i] = board
        map_board_id[board['board_id']] = i
        for pin in board['pins']:
            if (pin not in map_pin_id):
                map_pin_id[pin] = pins
                map_id_pin[pins] = pin
                pins += 1
            board_pin_pairs.append((map_board_id[board['board_id']], map_pin_id[pin]))
    boards = [board for (board, pin) in board_pin_pairs]
    pins = [pin for (board, pin) in board_pin_pairs]

    m_sp = sp.csr_matrix(([1] * len(boards), (boards, pins)), shape=(len(map_board_id), len(map_pin_id)))

    save_as_npz(m_sp, BIN_DATA[PINTEREST])
Пример #42
0
def _mongodb_decode_wire_protocol(message):
    """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """
    MONGO_OPS = {
        1000: 'msg',
        2001: 'update',
        2002: 'insert',
        2003: 'reserved',
        2004: 'query',
        2005: 'get_more',
        2006: 'delete',
        2007: 'kill_cursors',
    }
    _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20])
    op = MONGO_OPS.get(opcode, 'unknown')
    zidx = 20
    collection_name_size = message[zidx:].find('\0')
    collection_name = message[zidx:zidx + collection_name_size]
    if '.system.' in collection_name:
        return
    zidx += collection_name_size + 1
    skip, limit = struct.unpack('<ii', message[zidx:zidx + 8])
    zidx += 8
    msg = ""
    try:
        if message[zidx:]:
            msg = bson.decode_all(message[zidx:])
    except:
        msg = 'invalid bson'
    return {
        'op': op,
        'collection': collection_name,
        'msg_id': msg_id,
        'skip': skip,
        'limit': limit,
        'query': msg
    }
Пример #43
0
def convert_tags(file_name='papers.bson'):
    """
    Generates all the tags in the first run
    """
    file_name = f"{base_dir}/{file_name}"
    print("\n\nConverting tags")

    all_tags = set()
    count = 0

    # Making a list of all tags to be added
    with open(file_name, 'rb') as f:
        for doc in bson.decode_all(f.read()):
            if count % 1000 == 0:
                print(
                    f'{count} papers scanned, total {len(all_tags)} found so far'
                )

            tags = get_tags(doc)

            for tag_name in tags:
                # For the time being we ignore non-arxiv tags.
                # ArXiv tags are always of the form archive.subject (https://arxiv.org/help/arxiv_identifier)
                if not re.match('[A-Za-z\\-]+\\.[A-Za-z\\-]+', tag_name):
                    continue

                all_tags.add(tag_name)

            count += 1

    print(f'Total {len(all_tags)} unique tags')

    for tag_name in all_tags:
        tag = Tag(name=tag_name, source='arXiv')
        db.session.add(tag)
    db.session.commit()
Пример #44
0
def convert_tweets(papers_map, file_name=f'tweets.bson'):
    file_name = f"{base_dir}/{file_name}"
    print("\n\nConverting tweets")
    # Ex
    # {'_id': '1000018920986808328', 'pids': ['1804.03984'], 'inserted_at_date': datetime.datetime(2020, 5, 1, 23, 46, 44, 341000), 'created_at_date': datetime.datetime(2018, 5, 25, 14, 21, 4), 'created_at_time': 1527258064.0, 'lang': 'en', 'text': 'Coolest part of @aggielaz et al\'s most recent emergent communication paper: when agents jointly learn "conceptual" reprs alongside communication protocol, these concepts are heavily biased by the natural statistics of the environment. https://t.co/K1X6ZSwH3G https://t.co/2eqav3ax6g', 'retweets': 2, 'likes': 5, 'replies': 0, 'user_screen_name': 'j_gauthier', 'user_name': 'Jon Gauthier', 'user_followers_count': 4304, 'user_following_count': 457}
    with open(file_name, 'rb') as f:
        count = 0
        docs = bson.decode_all(f.read())
        total_tweets = len(docs)
        tweets = []
        for doc in docs:
            count += 1
            if count % 3000 == 0:
                print(f'{count}/{total_tweets} tweets parsed')
                db.session.bulk_save_objects(tweets)
                db.session.commit()
                tweets = []

            tweet = create_tweet(doc, papers_map)
            if tweet:
                tweets.append(tweet)

        db.session.bulk_save_objects(tweets)
        db.session.commit()
Пример #45
0
 def restoreDump(self, file, collectionName):
     client = MongoClient(MONGODB_URI)
     db = client[MGI_DB]
     target_collection = db[collectionName]
     re = open(file, 'rb').read()
     target_collection.insert(decode_all(re))
Пример #46
0
    def test_legacy_csharp_uuid(self):
        if not should_test_uuid:
            raise SkipTest("No uuid module")

        # Generated by the .net driver
        from_csharp = b('ZAAAABBfaWQAAAAAAAVuZXdndWlkABAAAAAD+MkoCd/Jy0iYJ7Vhl'
                        'iF3BAJuZXdndWlkc3RyaW5nACUAAAAwOTI4YzlmOC1jOWRmLTQ4Y2'
                        'ItOTgyNy1iNTYxOTYyMTc3MDQAAGQAAAAQX2lkAAEAAAAFbmV3Z3V'
                        'pZAAQAAAAA9MD0oXQe6VOp7mK4jkttWUCbmV3Z3VpZHN0cmluZwAl'
                        'AAAAODVkMjAzZDMtN2JkMC00ZWE1LWE3YjktOGFlMjM5MmRiNTY1A'
                        'ABkAAAAEF9pZAACAAAABW5ld2d1aWQAEAAAAAPRmIO2auc/Tprq1Z'
                        'oQ1oNYAm5ld2d1aWRzdHJpbmcAJQAAAGI2ODM5OGQxLWU3NmEtNGU'
                        'zZi05YWVhLWQ1OWExMGQ2ODM1OAAAZAAAABBfaWQAAwAAAAVuZXdn'
                        'dWlkABAAAAADISpriopuTEaXIa7arYOCFAJuZXdndWlkc3RyaW5nA'
                        'CUAAAA4YTZiMmEyMS02ZThhLTQ2NGMtOTcyMS1hZWRhYWQ4MzgyMT'
                        'QAAGQAAAAQX2lkAAQAAAAFbmV3Z3VpZAAQAAAAA98eg0CFpGlPihP'
                        'MwOmYGOMCbmV3Z3VpZHN0cmluZwAlAAAANDA4MzFlZGYtYTQ4NS00'
                        'ZjY5LThhMTMtY2NjMGU5OTgxOGUzAAA=')

        data = base64.b64decode(from_csharp)

        # Test decoding
        docs = bson.decode_all(data, SON, False, OLD_UUID_SUBTYPE)
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, SON, False, UUID_SUBTYPE)
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, SON, False, JAVA_LEGACY)
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, SON, False, CSHARP_LEGACY)
        for d in docs:
            self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        # Test encoding
        encoded = b('').join([
            bson.BSON.encode(doc, uuid_subtype=OLD_UUID_SUBTYPE)
            for doc in docs
        ])
        self.assertNotEqual(data, encoded)

        encoded = b('').join(
            [bson.BSON.encode(doc, uuid_subtype=UUID_SUBTYPE) for doc in docs])
        self.assertNotEqual(data, encoded)

        encoded = b('').join(
            [bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) for doc in docs])
        self.assertNotEqual(data, encoded)

        encoded = b('').join([
            bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) for doc in docs
        ])
        self.assertEqual(data, encoded)

        # Test insert and find
        client = get_client()
        client.pymongo_test.drop_collection('csharp_uuid')
        coll = client.pymongo_test.csharp_uuid
        coll.uuid_subtype = CSHARP_LEGACY

        coll.insert(docs)
        self.assertEqual(5, coll.count())
        for d in coll.find():
            self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        coll.uuid_subtype = OLD_UUID_SUBTYPE
        for d in coll.find():
            self.assertNotEqual(d['newguid'], d['newguidstring'])
        client.pymongo_test.drop_collection('csharp_uuid')
Пример #47
0
    def test_legacy_java_uuid(self):
        if not should_test_uuid:
            raise SkipTest("No uuid module")

        # Generated by the Java driver
        from_java = b('bAAAAAdfaWQAUCBQxkVm+XdxJ9tOBW5ld2d1aWQAEAAAAAMIQkfACFu'
                      'Z/0RustLOU/G6Am5ld2d1aWRzdHJpbmcAJQAAAGZmOTk1YjA4LWMwND'
                      'ctNDIwOC1iYWYxLTUzY2VkMmIyNmU0NAAAbAAAAAdfaWQAUCBQxkVm+'
                      'XdxJ9tPBW5ld2d1aWQAEAAAAANgS/xhRXXv8kfIec+dYdyCAm5ld2d1'
                      'aWRzdHJpbmcAJQAAAGYyZWY3NTQ1LTYxZmMtNGI2MC04MmRjLTYxOWR'
                      'jZjc5Yzg0NwAAbAAAAAdfaWQAUCBQxkVm+XdxJ9tQBW5ld2d1aWQAEA'
                      'AAAAPqREIbhZPUJOSdHCJIgaqNAm5ld2d1aWRzdHJpbmcAJQAAADI0Z'
                      'DQ5Mzg1LTFiNDItNDRlYS04ZGFhLTgxNDgyMjFjOWRlNAAAbAAAAAdf'
                      'aWQAUCBQxkVm+XdxJ9tRBW5ld2d1aWQAEAAAAANjQBn/aQuNfRyfNyx'
                      '29COkAm5ld2d1aWRzdHJpbmcAJQAAADdkOGQwYjY5LWZmMTktNDA2My'
                      '1hNDIzLWY0NzYyYzM3OWYxYwAAbAAAAAdfaWQAUCBQxkVm+XdxJ9tSB'
                      'W5ld2d1aWQAEAAAAAMtSv/Et1cAQUFHUYevqxaLAm5ld2d1aWRzdHJp'
                      'bmcAJQAAADQxMDA1N2I3LWM0ZmYtNGEyZC04YjE2LWFiYWY4NzUxNDc'
                      '0MQAA')

        data = base64.b64decode(from_java)

        # Test decoding
        docs = bson.decode_all(data, SON, False, OLD_UUID_SUBTYPE)
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, SON, False, UUID_SUBTYPE)
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, SON, False, CSHARP_LEGACY)
        for d in docs:
            self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        docs = bson.decode_all(data, SON, False, JAVA_LEGACY)
        for d in docs:
            self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        # Test encoding
        encoded = b('').join([
            bson.BSON.encode(doc, uuid_subtype=OLD_UUID_SUBTYPE)
            for doc in docs
        ])
        self.assertNotEqual(data, encoded)

        encoded = b('').join(
            [bson.BSON.encode(doc, uuid_subtype=UUID_SUBTYPE) for doc in docs])
        self.assertNotEqual(data, encoded)

        encoded = b('').join([
            bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) for doc in docs
        ])
        self.assertNotEqual(data, encoded)

        encoded = b('').join(
            [bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) for doc in docs])
        self.assertEqual(data, encoded)

        # Test insert and find
        client = get_client()
        client.pymongo_test.drop_collection('java_uuid')
        coll = client.pymongo_test.java_uuid
        coll.uuid_subtype = JAVA_LEGACY

        coll.insert(docs)
        self.assertEqual(5, coll.count())
        for d in coll.find():
            self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring']))

        coll.uuid_subtype = OLD_UUID_SUBTYPE
        for d in coll.find():
            self.assertNotEqual(d['newguid'], d['newguidstring'])
        client.pymongo_test.drop_collection('java_uuid')
Пример #48
0
with open('sondages.json', 'w') as f:
    f.write(dumps(sondages))

from bson import decode_all
from data.reader import BSONInput
'''from pprint import pprint 
for s in ['candidacies','candidates','elections']:
    globals()[s]=decode_all(open('data/'+s+'.bson').read())
    pprint(globals()[s])'''

candidatn = {}
candidate = {}
candidat = {}
election = {}
for d in decode_all(open('data/candidates.bson').read()):
    candidat[str(d['_id'])] = (d['first_name'] or '') + ' ' + d['last_name']
for d in decode_all(open('data/candidacies.bson').read()):
    #if d['published']==True:
    candidatn[str(d['_id'])] = candidat[str(d['candidate_ids'][0])]
    candidate[str(d['_id'])] = str(d[u'election_id'])

for d in decode_all(open('data/elections.bson').read()):
    election[str(d['_id'])] = d['name']

tags = {}
tagsname = {}
for d in decode_all(open('data/tags.bson').read()):
    tagsname[str(d['_id'])] = d['name']

from collections import Counter
Пример #49
0
async def validate_action(redis_conn, mongo, mysql_pool, action, data):
    result = 'Empty'

    if action == 'get_cookie':
        result = await redis_conn.execute('GET', data)
        if result:
            result = json.loads(result.decode())
        else:
            result = 'Empty'

    elif action == 'get_cookies':
        cur = b'0'
        cookies = []
        while cur:
            cur, keys = await redis_conn.scan(cur, match=data)
            for key in keys:
                cookies.append(key.decode())
        if cookies:
            result = {'cookies': cookies}
        else:
            result = 'Empty'

    elif action == 'create_report':
        try:
            username = data['username']
            email = data['email']
            text = data['text']
        except:
            return 'username, email, text is required'

        if username == '' or email == '' or text == '':
            return 'some field is empty ;('

        id = ObjectId()

        try:
            await mongo.sufferers.insert_one({
                '_id': id,
                'username': username,
                'email': email
            })
        except Exception as ex:
            result = str(ex)

        key = ''.join(random.choice(string.digits) for _ in range(6)) + '00'
        des = DES.new(key, DES.MODE_ECB)
        padded_text = pad(str(text).encode())
        encrypted_text = des.encrypt(padded_text)

        report_id = ObjectId()
        try:
            await mongo.reports.insert_one({
                '_id': report_id,
                'sufferer_id': ObjectId(id),
                'username': username,
                'encrypted_text': encrypted_text,
                'private_key': key.encode()
            })

            result = {
                'status':
                'ok',
                'private_key':
                key,
                'object_id':
                str(report_id),
                'encrypted_text':
                base64.encodebytes(encrypted_text).decode().strip('\n')
            }
        except Exception as ex:
            result = str(ex)

    elif action == 'get_report':
        try:
            id = data
        except:
            return 'data is required'
        try:
            report = await mongo.reports.find_one({'_id': ObjectId(data)})
            encrypted_text = base64.encodebytes(
                report['encrypted_text']).decode().strip('\n')
            result = {
                'report': {
                    'username': report['username'],
                    'encrypted_text': encrypted_text
                }
            }
        except Exception as ex:
            result = str(ex)

    elif action == 'get_reports':
        reports = []
        cursor = mongo.reports.find_raw_batches()
        async for batch in cursor:
            for item in bson.decode_all(batch):
                try:
                    encrypted_text = base64.encodebytes(
                        item['encrypted_text']).decode().strip('\n')
                    reports.append({
                        'username': item['username'],
                        'encrypted_text': encrypted_text
                    })
                except Exception as ex:
                    result = str(ex)
        result = {'reports': reports}

    elif action == 'send_comment':
        try:
            username = data['username']
            comment = data['comment']
            private = data['private']
        except:
            return 'username, comment, private is required'

        if username == '' or comment == '' or private == '':
            return 'some field is empty ;('

        async with mysql_pool.acquire() as conn:
            async with conn.cursor() as cur:
                try:
                    await cur.execute(
                        "select count(*) from users where username=%s;",
                        data['username'])
                    was_registered, = await cur.fetchone()

                    if not was_registered:
                        await cur.execute(
                            "insert into users (username) values (%s)",
                            data['username'])

                    await cur.execute(
                        "insert into comments (private, text, author_id) values ({}, '{}', (select user_id from users where username='******'))"
                        .format(data['private'], data['comment'],
                                data['username']))

                    await conn.commit()

                    result = {'ok': 'comment sent'}
                except Exception as ex:
                    result = str(ex)

    elif action == 'get_comments':
        async with mysql_pool.acquire() as conn:
            async with conn.cursor() as cur:
                await cur.execute(
                    'select case private when (username) then username else \'anonymous\' end text, case private when (username) then text else \'private comment\' end text from comments inner join users on author_id=user_id limit 100'
                )
                result = await cur.fetchall()

    elif action == 'get_my_comments':
        async with mysql_pool.acquire() as conn:
            async with conn.cursor() as cur:
                await cur.execute(
                    'select user_id from users where username = %s', data)
                user_id = await cur.fetchone()
                await cur.execute(
                    'select text from comments where author_id = %s', user_id)
                result = await cur.fetchall()

    return result
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.data import imread
import json
import bson
import io
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from keras.utils import np_utils

f = open('train_example.bson', 'rb')
bs = f.read()  # reads bytes from the specified file
docs = bson.decode_all(bs)
docs = bson.decode_file_iter(open('train_example.bson', 'rb'))
data = pd.DataFrame.from_dict(docs)
data.head()

#Printing sample images for each category
for i in range(5):
    picture = imread(io.BytesIO(data.imgs[i][0]['picture']))
    plt.figure()
    plt.imshow(picture)

X = []
for i in range(data.shape[0]):
    X.append(imread(io.BytesIO(data.imgs[i][0]['picture'])))

X = np.array(X, dtype=np.float32) / 255.
Пример #51
0
    def _upgradeAgent(self, newAgentVersion):
        """ Pull down the files, verify  and then stop the current process """

        res = urllib.request.urlopen(self.settings.upgrade_url %
                                     {'key': self.settings.mms_key})

        resBson = None
        try:
            resBson = bson.decode_all(res.read())
        finally:
            if res is not None:
                res.close()
                res = None

        if len(resBson) != 1:
            return

        upgradeResponse = resBson[0]

        if 'status' not in upgradeResponse or upgradeResponse[
                'status'] != 'ok' or 'files' not in upgradeResponse:
            return

        # Verify the auth codes for all files and names first.
        for fileInfo in upgradeResponse['files']:
            if fileInfo['fileAuthCode'] != hmac.new(
                    self.settings.secret_key,
                    fileInfo['file'],
                    digestmod=hashlib.sha1).hexdigest():
                self.logger.error(
                    'Invalid file auth code for upgrade - cancelling')
                return

            if fileInfo['fileNameAuthCode'] != hmac.new(
                    self.settings.secret_key,
                    fileInfo['fileName'],
                    digestmod=hashlib.sha1).hexdigest():
                self.logger.error(
                    'Invalid file name auth code for upgrade - cancelling')
                return

        # Write the files.
        for fileInfo in upgradeResponse['files']:

            fileContent = fileInfo['file']
            fileName = fileInfo['fileName']

            # If the user has a global username/password defined, make sure it is set in the new settings.py file.
            if fileName == 'settings.py' and getattr(
                    self.settings,
                    'globalAuthUsername', None) is not None and getattr(
                        self.settings, 'globalAuthPassword', None) is not None:
                fileContent = fileContent.replace(
                    'globalAuthPassword = None',
                    'globalAuthPassword=%r' % self.settings.globalAuthPassword)
                fileContent = fileContent.replace(
                    'globalAuthUsername = None',
                    'globalAuthUsername=%r' % self.settings.globalAuthUsername)

            fileSystemName = os.path.join(self.agentDir, fileName)
            newFile = open(fileSystemName, 'w')

            try:
                newFile.write(fileContent)
            finally:
                if newFile is not None:
                    newFile.close()

        # Stop the current agent process
        try:
            self.processContainer.stopAgentProcess()
            self.settings.settingsAgentVersion = newAgentVersion
            self.logger.info(
                'Agent upgraded to version: ' + newAgentVersion +
                ' - there is up to a five minute timeout before data will be sent again'
            )
        except Exception as e:
            self.logger.error('Problem restarting agent process: ' +
                              traceback.format_exc(e))
Пример #52
0
def decode_documents(buffer, start, content_size):
    docs = bson.decode_all(buffer[start:start + content_size], CODEC_OPTIONS)
    return docs, start + content_size
Пример #53
0
    def handle_io_in(self, data):
        offset = 0
        while len(data) - offset >= 16:
            h = packets.MsgHeader(data[offset:offset+16])
            # print(h.messageLength)
            # print(h.opCode)
            if len(data) - offset < h.messageLength:
                break
            if h.opCode == 2004:
                msg = packets.MsgQuery(data[offset+16:offset+h.messageLength])
                # print(h.show())
                # print(msg.show())
                query = None
                field_selectors = []
                if bson:
                    for doc in bson.decode_all(msg.payload.load):
                        if query is None:
                            query = doc
                        else:
                            field_selectors.append(doc)
                res = self._handle_query(fullCollectionName=msg.fullCollectionName, query=query, field_selectors=field_selectors)

                # print(msg)
                # print(msg.payload)
                # print(msg.payload.load)
                payload = b""
                for doc in res:
                    payload += bson.BSON.encode(doc)

                pkg = packets.MsgHeader(
                    responseTo=h.requestID,
                    opCode=1
                ) / packets.MsgReply(
                    numberReturned=len(res)
                ) / Raw(payload)
                pkg.show()
                self.send(pkg.build())
            elif h.opCode == 2010:
                msg = packets.MsgCommand(data[offset + 16:offset + h.messageLength])

                docs = bson.decode_all(msg.payload.load)
                res = self._handle_command(msg.database, msg.commandName, docs[0], docs[1], docs[1:])

                payload = b""
                for doc in res:
                    payload += bson.BSON.encode(doc)

                pkg = packets.MsgHeader(
                    responseTo=h.requestID,
                    opCode=2011
                ) / packets.MsgCommandReply(
                ) / Raw(payload)
                pkg.show()
                self.send(pkg.build())

            # print(h.payload)

            # ToDo: check length
            offset = offset + h.messageLength

        return offset
Пример #54
0
def ifilter(predicate, iterable):
    """Filter records and return decoded object so that batch processing can work correctly"""
    return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"])
            for item in iterable if predicate(item))
Пример #55
0
def ifilterfalse(predicate, iterable):
    """Filter records that do not match predicate and return decoded object so that batch processing can encode"""
    return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"])
            for item in iterable if not predicate(item))
Пример #56
0
import tensorflow as tf
import matplotlib.pyplot as plt
from skimage.data import imread  # or, whatever image library you prefer
from sklearn import preprocessing
from subprocess import check_output

#Output the files in the catagory
print(check_output(["ls", "./input"]).decode("utf8"))
#Ignore Warnings
warnings.filterwarnings("ignore")

# Simple data processing
data = bson.decode_file_iter(open('./input/train_example.bson', 'rb'))
# read bson file into pandas DataFrame
with open('./input/train_example.bson', 'rb') as b:
    df = pd.DataFrame(bson.decode_all(b.read()))

#Get shape of first image
for e, pic in enumerate(df['imgs'][0]):
    picture = imread(io.BytesIO(pic['picture']))
    pix_x, pix_y, rgb = picture.shape

n = len(df.index)  #cols of data in train set
X_ids = np.zeros((n, 1)).astype(int)
Y = np.zeros((n, 1)).astype(int)  #category_id for each row
X_images = np.zeros((n, pix_x, pix_y, rgb))  #m images are 180 by 180 by 3

i = 0
for c, d in enumerate(data):
    X_ids[i] = d['_id']
    Y[i] = d['category_id']
Пример #57
0
 def load_row(self, data):
     self._set_data(bson.decode_all(data)[0]['array'])
Пример #58
0
def loadData(filename):
    b_file = open(filename, 'rb')
    bs = b_file.read()
    dictionary = bson.decode_all(bs)
    return dictionary
Пример #59
0
from bson import BSON
from bson import decode_all
from pymongo import MongoClient

try:
    client = MongoClient('mongodb://*****:*****@server:27017/database',
                         ssl=True,
                         ssl_certfile='client.pem',
                         ssl_ca_certs='ca.pem'
                         )
    db = client.database  # DATABASE
    retrieve_request = db.Collection  # COLLECTION


    with open('Path to your backup.bson', 'rb') as f:
        retrieve_request.insert(decode_all(f.read()))

    client.close()
except Exception as e:
    print(str(e))
Пример #60
0
    def _pullRemoteConf(self):
        """ Pull the remote configuration data """

        uniqueHostnames = []

        res = None

        try:

            res = urllib2.urlopen(self.confUrl)

            resBson = None
            try:
                resBson = bson.decode_all(res.read())
            finally:
                if res is not None:
                    res.close()
                    res = None

            if len(resBson) != 1:
                return

            confResponse = resBson[0]

            if 'hosts' not in confResponse:
                self.mmsAgent.stopAll()
                return

            if 'disableDbstats' in confResponse:
                self.mmsAgent.disableDbstats = confResponse['disableDbstats']
            else:
                self.mmsAgent.disableDbstats = False

            hosts = confResponse['hosts']

            self.mmsAgent.serverHostDefsLock.acquire()
            try:
                # Extract the host information
                if hosts is not None:
                    for host in hosts:

                        hostDef, hostDefLast = self.mmsAgent.extractHostDef(
                            host)

                        hostKey = hostDef['hostKey']
                        uniqueHostnames.append(hostKey)

                        if hostKey not in self.mmsAgent.serverHostDefs:
                            self.mmsAgent.startMonitoringThreads(hostDef)
                        else:
                            self.mmsAgent.checkChangedHostDef(
                                hostDef, hostDefLast)

                        hostDef = None
                        hostDefLast = None

                # Check to see if anything was removed
                for hostDef in self.mmsAgent.serverHostDefs.values():
                    if hostDef['hostKey'] not in uniqueHostnames:
                        self.mmsAgent.stopAndClearHost(hostDef['hostKey'])
            finally:
                self.mmsAgent.serverHostDefsLock.release()

        except Exception, e:
            if res is not None:
                try:
                    res.close()
                    res = None
                except:
                    pass

            self.logger.warning(
                "Problem pulling configuration data from MMS (check firewall and network): "
                + traceback.format_exc(e))