Exemplo n.º 1
0
def parsefile(jfile):
	lno=0
	try:
		jf = open(jfile,'r')
	except:
		print >> sys.stderr, "Unexpected error opening", jfile
		sys.exit(0)

	job=HadoopJobClass()
	for line in jf:
		lno+=1
		tasktype=line.split(" ",1)
		if tasktype[0] == "Job":
			analyze_job(job,tasktype[1])
		elif tasktype[0] == "MapAttempt" or tasktype[0] == "ReduceAttempt":
			result = parsenamevalue(tasktype[1])
			if result.has_key("TASK_ATTEMPT_ID"):
				idx=result["TASK_ATTEMPT_ID"]
				t=job.get_task(idx)
				if t==None:
					t=tasks.taskclass()
					t.set_taskid(idx)
					job.add_task(t)
				tasks.analyze_task(tasktype[0],result,t)
		elif tasktype[0] == "Task":
			pass
		else:
			print >> sys.stderr, "Unknown Task", tasktype[0], " line numer =",lno
	return job
Exemplo n.º 2
0
def parsefile(jfile):
    lno = 0
    try:
        jf = open(jfile, "r")
    except:
        print >> sys.stderr, "Unexpected error opening", jfile
        sys.exit(0)

    job = HadoopJobClass()
    for line in jf:
        lno += 1
        tasktype = line.split(" ", 1)
        if tasktype[0] == "Job":
            analyze_job(job, tasktype[1])
        elif tasktype[0] == "MapAttempt" or tasktype[0] == "ReduceAttempt":
            result = parsenamevalue(tasktype[1])
            if result.has_key("TASK_ATTEMPT_ID"):
                idx = result["TASK_ATTEMPT_ID"]
                e = 0
                for t in task:
                    if idx in t.tid:
                        e = 1
                if e == 0:
                    t = tasks.taskclass()
                    # 					task[idx]=t
                    task.append(t)
                    t.set_taskid(idx)
            tasklist = tasks.analyze_task(tasktype[0], result, t)
        elif tasktype[0] == "Task":
            pass
        else:
            print >> sys.stderr, "Unknown Task", tasktype[0], " line numer =", lno

    stime = job.get_stime()
    etime = job.get_etime()
    status = job.get_status()

    st = time.localtime(stime / 1000)
    et = time.localtime(etime / 1000)
    diff = datetime.datetime(et[0], et[1], et[2], et[3], et[4], et[5]) - datetime.datetime(
        st[0], st[1], st[2], st[3], st[4], st[5]
    )
    smaps = job.get_fin_mapjobs()
    fmaps = job.get_fail_mapjobs()
    sreds = job.get_fin_redjobs()
    freds = job.get_fail_redjobs()
    mhdfs_read, mhdfs_write, mfs_read, mfs_write, rhdfs_read, rhdfs_write, rfs_read, rfs_write = print_fs_stats(task)
    print "Status =", status

    if status == "SUCCESS":
        print "Log File                        =", jfile
        print "Job ID                          =", job.get_jobid()
        print "Job Status                      =", status
        print "Start Time                      =", datetime.datetime(st[0], st[1], st[2], st[3], st[4], st[5]).strftime(
            "%Y/%m/%d-%H:%M:%S"
        ), "(", stime, ")"
        print "End  Time                       =", datetime.datetime(et[0], et[1], et[2], et[3], et[4], et[5]).strftime(
            "%Y/%m/%d-%H:%M:%S"
        ), "(", etime, ")"
        print "Time taken                      =", diff
        print "Completed Maps                  =", smaps
        print "Completed Reduces               =", sreds
        print "Failed Maps                     =", fmaps
        print "Failed Reduces                  =", freds
        print "HDFS bytes read(Map,Reduce,Total)    =", mhdfs_read, rhdfs_read, job.get_hdfs_read()
        print "HDFS bytes Written(Map,reduce,total) =", mhdfs_write, rhdfs_write, job.get_hdfs_write()
        print "FS bytes read(map,reduce,total)      =", mfs_read, rfs_read, job.get_fs_read()
        print "FS bytes Written(Map,Reduce,Total)   =", mfs_write, rfs_write, job.get_fs_write()
    if verbose == 1:
        print_tasks(task)
Exemplo n.º 3
0
def parsefile(jfile,cfile,print_detailed,print_summary):
	lno=0
	try:
		jf = open(jfile,'r')
	except:
		print >> sys.stderr, "Unexpected error opening", jfile
		sys.exit(0)

	command = "/bin/cat " + cfile + " | /bin/grep mapred.job.name | /usr/bin/xml_grep value --text_only"
	p = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
	mapred_job_name=p.stdout.read()


	job=HadoopJobClass()
	for line in jf:
		lno+=1
		tasktype=line.split(" ",1)
		if tasktype[0] == "Job":
			analyze_job(job,tasktype[1])
		elif tasktype[0] == "MapAttempt" or tasktype[0] == "ReduceAttempt":
			result = parsenamevalue(tasktype[1])
			if result.has_key("TASK_ATTEMPT_ID"):
				idx=result["TASK_ATTEMPT_ID"]
				e=0
				for t in task:
					if idx in t.tid:
						e=1
				if e == 0:
					t=tasks.taskclass()
#					task[idx]=t
					task.append(t)
					t.set_taskid(idx)
			tasklist= tasks.analyze_task(tasktype[0],result,t)
		elif tasktype[0] == "Task":
			pass
		else:
			pass
#			print >> sys.stderr, "Unknown Task", tasktype[0], " line numer =",lno
	
	stime  = job.get_stime()
	etime  = job.get_etime()
	status = job.get_status()

	st=time.localtime(stime/1000)
	et=time.localtime(etime/1000)
	diff=datetime.datetime(et[0],et[1],et[2],et[3],et[4],et[5]) - datetime.datetime(st[0],st[1],st[2],st[3],st[4],st[5])	
	smaps=job.get_fin_mapjobs()
	fmaps=job.get_fail_mapjobs()
	sreds=job.get_fin_redjobs()
	freds=job.get_fail_redjobs()
	mhdfs_read,mhdfs_write,mfs_read,mfs_write,rhdfs_read,rhdfs_write,rfs_read,rfs_write=print_fs_stats(task)

        header = []
        values = []

        header.append("Status");
        values.append(status);

	if status == "SUCCESS" and print_summary == "1":
		header.append(",mapred.job.name")
		values.append("," + str(mapred_job_name).rstrip('\n'))
		header.append(",Log File")
		values.append("," + jfile);
                header.append(",Job ID")
                values.append("," + job.get_jobid());
                header.append(",Start Time")
		values.append(("," + datetime.datetime(st[0],st[1],st[2],st[3],st[4],st[5]).strftime("%m/%d/%Y %H:%M:%S")))
                header.append(",End Time")
                values.append(("," + datetime.datetime(et[0],et[1],et[2],et[3],et[4],et[5]).strftime("%m/%d/%Y %H:%M:%S")))
                header.append(",Time taken (Seconds)")
                values.append(("," + str(diff.seconds + diff.days*24*3600)))
		header.append(",Map Task Time (Seconds)")
		values.append((","+ str(get_totaltask_time(task,"MAP"))))
		header.append(",Reduce Task Time (Seconds)")
		values.append((","+ str(get_totaltask_time(task,"REDUCE"))))
                header.append(",Completed Maps")
                values.append(("," + smaps));
                header.append(",Completed Reduces")
                values.append(("," + sreds));
                header.append(",Failed Maps")
                values.append(("," + fmaps));
                header.append(",Failed Reduces")
                values.append(("," + freds))
                header.append(",HDFS bytes read(Map)")
                values.append(("," + str(mhdfs_read)))
                header.append(",HDFS bytes read(Reduce)")
                values.append(("," + str(rhdfs_read)))
                header.append(",HDFS bytes read(Total)")
                values.append(("," + str(job.get_hdfs_read())))
                header.append(",HDFS bytes written(Map)")
                values.append(("," + str(mhdfs_write)))
                header.append(",HDFS bytes written(Reduce)")
                values.append(("," +str( rhdfs_write)))
                header.append(",HDFS bytes written(Total)")
                values.append(("," + str(job.get_hdfs_write())))
                header.append(",FS bytes read(Map)")
                values.append(("," + str(mfs_read)))
                header.append(",FS bytes read(Reduce)")
                values.append(("," + str(rfs_read)))
                header.append(",FS bytes read(Total)")
                values.append(("," + str(job.get_fs_read())))
                header.append(",FS bytes written(Map)")
                values.append(("," + str(mfs_write)))
                header.append(",FS bytes written(Reduce)")
		values.append(("," + str(rfs_read)))
                header.append(",FS bytes read(Total)")
                values.append(("," + job.get_fs_write()))

                header.append(",MAP_START")
		values.append(("," + str(get_starttime_string(task,"MAP"))))
                header.append(",MAP_END")
		values.append(("," + str(get_endtime_string(task,"MAP"))))

                header.append(",REDUCE_START")
		values.append(("," + str(get_starttime_string(task,"REDUCE"))))
                header.append(",REDUCE_END")
		values.append(("," + str(get_endtime_string(task,"REDUCE"))))

                header.append(",MAP_TIME_CLOCK(s)")
		values.append(("," + str(get_deltatimes(task,"MAP"))))
                header.append(",REDUCE_TIME_CLOCK(s)")
		values.append(("," + str(get_deltatimes(task,"REDUCE"))))

		if print_header == "1":
	                print ''.join(header);
                print ''.join(values);


		

	jobid=job.get_jobid()
	jobname=str(mapred_job_name).rstrip('\n')
	if status == "SUCCESS" and 0:
		print "Log File                        =",jfile
		print "Job ID                          =",job.get_jobid()
		print "Job Status                      =",status
		print "Start Time                      =",datetime.datetime(st[0],st[1],st[2],st[3],st[4],st[5]).strftime("%Y/%m/%d-%H:%M:%S"),"(",stime,")"
		print "End  Time                       =",datetime.datetime(et[0],et[1],et[2],et[3],et[4],et[5]).strftime("%Y/%m/%d-%H:%M:%S"),"(",etime,")"
		print "Time taken                      =",diff
		print "Completed Maps                  =",smaps
		print "Completed Reduces               =",sreds
		print "Failed Maps                     =",fmaps
		print "Failed Reduces                  =",freds
		print "HDFS bytes read(Map,Reduce,Total)    =",mhdfs_read,rhdfs_read,job.get_hdfs_read()
		print "HDFS bytes Written(Map,reduce,total) =",mhdfs_write,rhdfs_write,job.get_hdfs_write()
		print "FS bytes read(map,reduce,total)      =",mfs_read,rfs_read,job.get_fs_read()
		print "FS bytes Written(Map,Reduce,Total)   =",mfs_write,rfs_write,job.get_fs_write()
	if verbose ==1 and print_detailed =="1":
		print_tasks(task,jobid,jobname)