示例#1
0
def migrate(muri, dbname, huri):
    "Migrate data from MongoDB (muri) to HDFS (huri)"
    mstg = MongoStorage(muri, dbname)
    hstg = HdfsStorage(huri)

    # read data from MongoDB
    query = {'stype': mstg.stype}
    mdocs = mstg.read(query)
    mids = [d['wmaid'] for d in mdocs]

    # do nothing if no documents is found
    if not len(mdocs):
        return

    # store data to HDFS
    wmaid = hstg.write(mdocs)

    # read data from HDFS
    hdocs = hstg.read(wmaid)

    # now we can compare MongoDB docs with HDFS docs, a la cross-check
    for mdoc, hdoc in zip(mdocs, hdocs):
        # drop WMArchive keys
        for key in ['stype', 'wmaid']:
            if key in mdoc:
                del mdoc[key]
            if key in hdoc:
                del hdoc[key]
        if mdoc != hdoc:
            print("ERROR", mdoc, hdoc)
            sys.exit(1)

    # update status attributes of docs in MongoDB
    query = {'$set': {'stype': hstg.stype}}
    mstg.update(mids, query)
示例#2
0
def migrate(muri, huri):
    "Migrate data from MongoDB (muri) to HDFS (huri)"
    mstg = MongoStorage(muri)
    hstg = HdfsStorage(huri)

    # read data from MongoDB
    query = {'stype': mstg.stype}
    mdocs = mstg.read(query)
    mids = [d['wmaid'] for d in mdocs]

    # do nothing if no documents is found
    if  not len(mdocs):
        return

    # store data to HDFS
    wmaid = hstg.write(mdocs)

    # read data from HDFS
    hdocs = hstg.read(wmaid)

    # now we can compare MongoDB docs with HDFS docs, a la cross-check
    for mdoc, hdoc in zip(mdocs, hdocs):
        # drop WMArchive keys
        for key in ['stype', 'wmaid']:
            if  key in mdoc:
                del mdoc[key]
            if  key in hdoc:
                del hdoc[key]
        if mdoc != hdoc:
            print("ERROR", mdoc, hdoc)
            sys.exit(1)

    # update status attributes of docs in MongoDB
    query = {'$set' : {'stype': hstg.stype}}
    mstg.update(mids, query)
示例#3
0
class MongoStorageTest(unittest.TestCase):
    def setUp(self):
        uri = os.environ.get("WMA_MONGODB", "mongodb://localhost:8230")
        self.dbname = "test_fwjr"
        try:
            self.mgr = MongoStorage(uri, dbname=self.dbname)
            self.mgr.remove()
        except:
            self.mgr = None
            print("WARNING: cannot connect to %s" % uri)
        data = {
            "int": 1,
            "float": 1.2,
            "list": [1, 2, 3],
            "dict": {"dname": "foo", "dval": 1},
            "listdict": [{"lname": "foo"}],
            "str": "string",
        }
        self.bare_data = dict(data)
        data["wmaid"] = wmaHash(data)
        data["stype"] = "mongodb"
        self.data = data

    def tearDown(self):
        "Tear down content of temp dir"
        self.mgr.remove()
        self.mgr.dropdb(self.dbname)

    def test_write(self):
        "Test write functionality"
        if self.mgr:
            wmaids = self.mgr.write(self.data)
            self.assertEqual(len(wmaids), 1)
            data = self.mgr.read(wmaids[0])
            record = data[0]
            for key in ["wmaid", "stype"]:
                if key in record:
                    del record[key]
            self.assertEqual(record, self.bare_data)
            data = self.mgr.read(wmaids[0], ["dict"])
            self.assertEqual(1, len(data))
            self.assertEqual(data[0]["dict"], self.bare_data["dict"])
示例#4
0
def migrate(muri):
    "Write and read data to MongoDB"
    mstg = MongoStorage(muri, dbname='test_fwjr')
    doc = {"test": 1, 'wmaid': 1}
    mstg.write([doc])

    # read data from MongoDB
    query = {}
    mdocs = mstg.read(query)
    for doc in mdocs:
        print(doc)
        if '_id' in doc:
            print("found _id in doc")
            print(doc)
            break
示例#5
0
def migrate(muri):
    "Write and read data to MongoDB"
    mstg = MongoStorage(muri, dbname='test_fwjr')
    doc = {"test":1, 'wmaid':1}
    mstg.write([doc])

    # read data from MongoDB
    query = {}
    mdocs = mstg.read(query)
    for doc in mdocs:
        print(doc)
        if '_id' in doc:
            print("found _id in doc")
            print(doc)
            break