Exemplo n.º 1
0
def test():
    """
    """
    client = Client("192.168.99.100", 9000)
    for f in client.ls(['/files']):
        print f
        for line in client.cat([f.get('path')]):
            for l in line:
                print l
Exemplo n.º 2
0
def metrics():
    print "Recieved metrics request..."
    metric_prefix = "hdfs_directory_stats"
    metrics = {"the_number_one": "1"}
    c = Client("namenode", 8020)
    filepaths = map(lambda entry: entry['path'], c.ls([sys.argv[1]]))
    lines = reduce(lambda a, b: a + b,
                   [1 for f in c.cat(filepaths) for _ in f])
    metrics['lines_of_text_in_directory'] = lines
    template_kwargs = {
        'metrics': metrics,
        'dir': sys.argv[1],
        'metric_prefix': metric_prefix
    }
    return Response(render_template("metrics", **template_kwargs),
                    mimetype='text/plain')
Exemplo n.º 3
0
class HdfsReader:
    """
    HdfsReader class

    Connects to an hdfs endpoint (namenode) and checks argo profile files stored there
    Uses a specific base path for determining argo file destinations
    """
    def __init__(self, namenode, port, base_path):
        """
        Initialized HdfsReader which is used to check/read profile files from hdfs
        Args:
            namenode: str. hdfs namenode host
            port: int. hdfs namenode port
            base_path: str. base path to  destination used for argo
        """
        self.client = Client(namenode, port)
        self.base_path = base_path

    def gen_profile_path(self, tenant, report, profile_type):
        """
        Generates a valid hdfs path to a specific profile
        Args:
            tenant: str. tenant to be used
            report: str. report to be used
            profile_type: str. profile_type (operations|reports|aggregations|thresholds)

        Returns:
            str: hdfs path

        """
        templates = dict()
        templates.update({
            'operations': '{0}_ops.json',
            'aggregations': '{0}_{1}_ap.json',
            'reports': '{0}_{1}_cfg.json',
            'thresholds': '{0}_{1}_thresholds.json',
            'recomputations': 'recomp.json'
        })

        sync_path = self.base_path.replace("{{tenant}}", tenant)
        filename = templates[profile_type].format(tenant, report)
        return os.path.join(sync_path, filename)

    def cat(self, tenant, report, profile_type):
        """
        Returns the contents of a profile stored in hdfs
        Args:
            tenant: str. tenant name
            report: str. report name
            profile_type: str. profile type (operations|reports|aggregations|thresholds)

        Returns:

        """
        path = self.gen_profile_path(tenant, report, profile_type)
        try:
            txt = self.client.cat([path])
            j = json.loads(txt.next().next())
            return j, True
        except FileNotFoundException:
            return None, False

    def rem(self, tenant, report, profile_type):
        """
        Removes a profile file that already exists in hdfs (in order to be replaced)
        Args:
            tenant: str. tenant name
            report: str. report name
            profile_type: str. profile type (operations|reports|aggregations|thresholds)

        Returns:

        """
        path = self.gen_profile_path(tenant, report, profile_type)

        try:
            self.client.delete([path]).next()
            return True
        except FileNotFoundException:
            return False
Exemplo n.º 4
0
def main(opts, args):
    hadoop_host = HADOOP_HOST
    hadoop_user_dir = None
    if opts.hdfs:
        print("hdfs enter")
        if opts.host:
            hadoop_host = opts.host
        hadoop_user_dir = opts.hdfs

    uni_gram_cnt = 0
    bi_gram_cnt = 0
    tri_gram_cnt = 0
    four_gram_cnt = 0
    five_gram_cnt = 0

    result_buffer = []
    source_input = None
    if not hadoop_user_dir:
        if len(args) > 2:
            source_input = sys.argv[1]
        else:
            source_input = sys.stdin

        for line in source_input:
            result_buffer.append(line)

            items = line.split()
            items_cnt = len(items)
            if items_cnt == 3: # 1-grams
                uni_gram_cnt +=1
            elif items_cnt == 4: #2-grams
                bi_gram_cnt += 1
            elif items_cnt == 5: #3-grams
                tri_gram_cnt += 1
            elif items_cnt == 6:
                four_gram_cnt += 1
            elif items_cnt == 7:
                five_gram_cnt +=1

    else:
        print "connect to haddoop"
        hadoop_client = Client(hadoop_host, 8020, use_trash=False)
        for g in hadoop_client.cat([os.path.join(hadoop_user_dir, "*.txt")]):
            for line in g:
                result_buffer.append(line)

                items = line.split()
                items_cnt = len(items)
                if items_cnt == 3: # 1-grams
                    uni_gram_cnt +=1
                elif items_cnt == 4: #2-grams
                    bi_gram_cnt += 1
                elif items_cnt == 5: #3-grams
                    tri_gram_cnt += 1
                elif items_cnt == 6:
                    four_gram_cnt += 1
                elif items_cnt == 7:
                    five_gram_cnt +=1


    print('\\data\\')
    if uni_gram_cnt != 0:
        print("ngram 1=%s" % uni_gram_cnt)

    if bi_gram_cnt != 0:
        print("ngram 2=%s" % bi_gram_cnt)

    if tri_gram_cnt != 0:
        print("ngram 3=%s" % tri_gram_cnt)

    if four_gram_cnt != 0:
        print("ngram 4=%s" % four_gram_cnt)

    if five_gram_cnt != 0:
        print("ngram 5=%s" % five_gram_cnt)

    result_iter = iter(result_buffer)
    print
    print_ngram(result_iter, 1, uni_gram_cnt)
    print
    print_ngram(result_iter, 2, bi_gram_cnt)
    print
    print_ngram(result_iter, 3, tri_gram_cnt)
    print
    print("\\end\\")
Exemplo n.º 5
0
#!/usr/bin/env python

from snakebite.client import Client
import time
import os

host = '100.127.13.16'
port = 8020
client = Client(host, port, use_trash=False)

path = '/data/landing/mobileye/20180604T202528Z/BTYS7524024D1P9DGN/0197/20180406_Run2_ES8VB21_301127_Day_Rainy_CA_AEB_Collection_'


def getInf(path):
    result = []
    for x in client.ls([path]):
        result.append(x)
    ordered = sorted(result, key=lambda x: x['path'])

    for f in ordered:
        if f['file_type'] == 'd':
            yield os.path.join(f['path'], 'EMP.inf')


for inf in getInf(path):
    print inf
    for content in client.cat([inf]):
        for line in content:
            print line
Exemplo n.º 6
0
#    hdfs_host='100.127.6.35'
    hdfs_host='100.127.13.16'
#    hdfs_port=9820
    hdfs_port=8020

    client = Client(host=hdfs_host, port= hdfs_port, use_trash=False, effective_user='******')

    if len(sys.argv) < 2:
        print 'inf_verification.py path'
        sys.exit(0)

    input_dir=sys.argv[1]

    input_files=[]
    for clip in client.ls([input_dir]):
        if clip['file_type'] == 'd':
            input_files.append(clip['path'])

    for folder in sorted(input_files):
        for inf in client.cat([getInf(folder)]):
            for content in inf:
                start=None
                end=None
                for aline in content.split('\n'):
                    if aline.startswith('startTime'):
                        start=aline.strip()
                    elif aline.startswith('endTime'):
                        end=aline.strip()

                print '{}\t{}\t{}'.format(os.path.basename(folder),start, end)
Exemplo n.º 7
0
#!/usr/bin/env python

from snakebite.client import Client
import time
import os

host='100.127.13.16'
port=8020
client = Client(host, port, use_trash=False)

path='/data/landing/mobileye/20180604T202528Z/BTYS7524024D1P9DGN/0197/20180406_Run2_ES8VB21_301127_Day_Rainy_CA_AEB_Collection_'


def getInf(path):
    result=[]
    for x in client.ls([path]):
        result.append(x)
    ordered=sorted(result, key=lambda x: x['path'])

    for f in ordered:
        if f['file_type'] == 'd':
            yield os.path.join(f['path'],'EMP.inf')


for inf in getInf(path):
    print inf
    for content in client.cat([inf]):
        for line in content:
            print line