Exemplo n.º 1
0
def import_file(json_fname, hdfs_path='/var/metlog/data'):
    '''
    Import a JSON log file into HDFS
    '''
    start_time = datetime.datetime.now()
    try:
        fs = PyHDFS()
        fname = fs.next_filename(hdfs_path)
        with closing(open(json_fname, 'r')) as file_in:
            with closing(fs.open(fname, 'w')) as writer:
                for line in file_in:
                    line = line.strip()
                    writer.append(line)
                print "Complete filesize: ", writer.getLength()
    finally:
        fin_time = datetime.datetime.now()
    delta = fin_time - start_time
Exemplo n.º 2
0
from __future__ import with_statement
from contextlib import closing

from hdfs import PyHDFS

if __name__ == '__main__':
    fs = PyHDFS()

    import datetime
    fname = fs.next_filename('/tmp/var/lock')
    print "Using: %s" % fname
    print "Start: %s" % datetime.datetime.now()
    with closing(open('sample.json.log', 'r')) as file_in:
        with closing(fs.open(fname, 'w')) as writer:
            for line in file_in:
                line = line.strip()
                writer.append(line)
    print "Finished: %s" % datetime.datetime.now()

    with closing(fs.open(fname, 'r')) as reader:
        for syncSeen, key, json_blob in reader:
            pass



Exemplo n.º 3
0
def export_file(hdfs_fname):
    fs = PyHDFS()
    with closing(fs.open(hdfs_fname, 'r')) as reader:
        for syncSeen, key, json_blob in reader:
            print json_blob