示例#1
0
def migrate(muri, dbname, huri):
    "Migrate data from MongoDB (muri) to HDFS (huri)"
    mstg = MongoStorage(muri, dbname)
    hstg = HdfsStorage(huri)

    # read data from MongoDB
    query = {'stype': mstg.stype}
    mdocs = mstg.read(query)
    mids = [d['wmaid'] for d in mdocs]

    # do nothing if no documents is found
    if not len(mdocs):
        return

    # store data to HDFS
    wmaid = hstg.write(mdocs)

    # read data from HDFS
    hdocs = hstg.read(wmaid)

    # now we can compare MongoDB docs with HDFS docs, a la cross-check
    for mdoc, hdoc in zip(mdocs, hdocs):
        # drop WMArchive keys
        for key in ['stype', 'wmaid']:
            if key in mdoc:
                del mdoc[key]
            if key in hdoc:
                del hdoc[key]
        if mdoc != hdoc:
            print("ERROR", mdoc, hdoc)
            sys.exit(1)

    # update status attributes of docs in MongoDB
    query = {'$set': {'stype': hstg.stype}}
    mstg.update(mids, query)
示例#2
0
def migrate(muri, huri):
    "Migrate data from MongoDB (muri) to HDFS (huri)"
    mstg = MongoStorage(muri)
    hstg = HdfsStorage(huri)

    # read data from MongoDB
    query = {'stype': mstg.stype}
    mdocs = mstg.read(query)
    mids = [d['wmaid'] for d in mdocs]

    # do nothing if no documents is found
    if  not len(mdocs):
        return

    # store data to HDFS
    wmaid = hstg.write(mdocs)

    # read data from HDFS
    hdocs = hstg.read(wmaid)

    # now we can compare MongoDB docs with HDFS docs, a la cross-check
    for mdoc, hdoc in zip(mdocs, hdocs):
        # drop WMArchive keys
        for key in ['stype', 'wmaid']:
            if  key in mdoc:
                del mdoc[key]
            if  key in hdoc:
                del hdoc[key]
        if mdoc != hdoc:
            print("ERROR", mdoc, hdoc)
            sys.exit(1)

    # update status attributes of docs in MongoDB
    query = {'$set' : {'stype': hstg.stype}}
    mstg.update(mids, query)
示例#3
0
def write(fin, huri):
    "Write fiven file into HDFS"
    hstg = HdfsStorage(huri)
    data = open(fin).read()
    path = huri.split(':', 1)[-1]
    hstg.dump(data, path)
示例#4
0
def write(fin, huri):
    "Write fiven file into HDFS"
    hstg = HdfsStorage(huri)
    data = open(fin).read()
    path = huri.split(':', 1)[-1]
    hstg.dump(data, path)