示例#1
0
def retrieve_pie_date(start_time,finish_time,generate="unknow",groupby="unknow",**kwargs):
    '''start_time and end_time should be like this '2010-01-01'
       The available kwargs can be: "JobGroup","FinalMajorStatus","User","JobType","JobClass","ProcessingType",
       "UserGroup","FinalMinorStatus","Site"...
    '''
    if((generate=="unknow") or (groupby=="unknow")):
        print "error"
        return -1
    start_timestamp = int(time.mktime(time.strptime(start_time,"%Y-%m-%d")))
    finsih_timestamp = int(time.mktime(time.strptime(finish_time,"%Y-%m-%d")))
    if len(kwargs)==0:
        print "no kwargs"
        #return 0
        groupby = groupby.lower()
        cf_name = "new_cum_groupby_%s"%groupby
        cf = pycassa.ColumnFamily(pool,cf_name)
        groupby_list = []
        sum_generate_list = []
        sum_generate = 0
        
        start = time.time()
        for key,columns in cf.get_range(column_count=365000,column_start=(generate,start_time),column_finish=(generate,finish_time)):
            groupby_list.append(key)
            for value in columns.values():
                sum_generate +=(value)

            sum_generate_list.append(sum_generate)
            sum_generate = 0
        pairs = zip(groupby_list,sum_generate_list)
    else:
        print "has kwargs"
        #return 0
        cf = pycassa.ColumnFamily(pool,'bucket_data_cli')
        expr_list = []
        for key,value in eval_prefs(**kwargs).items():
            kwarg_expr = pycassa.create_index_expression(key,value)
            expr_list.append(kwarg_expr)
        bucketLength_expr = pycassa.create_index_expression("bucketLength",604800)
        expr_list.append(bucketLength_expr)
        start_expr = pycassa.create_index_expression("startTime",start_timestamp,pycassa.GTE)
        expr_list.append(start_expr)
        end_expr = pycassa.create_index_expression("startTime",finsih_timestamp,pycassa.LTE)
        expr_list.append(end_expr)
        #print len(expr_list)
        #return 0

        clause = pycassa.create_index_clause(expr_list,count=600000)

        data_dict = Counter() 
        for key,columns in cf.get_indexed_slices(clause):
            data_dict[columns[groupby]] +=columns[generate]
        pairs = data_dict.items()
        
    return pairs 
示例#2
0
def generate_linegraph(start_time="",end_time="",generate="unknow",groupby="unknow",cumulative=False,**kwargs):
    '''generate:CPUTime,DiskSpace,ExecTime,InputSandBoxSize,OutPutSandBoxSize,JobCount
       groupby:site,user,processingtype,country,grid
       dafaule cumulative=Falese,if you want to generate a cumulative graph,set it True
       
    '''
    if((generate=="unknow") or(groupby=="unknow")):
        print "error"
        return -1
    
    start_timestamp = int(time.mktime(time.strptime(start_time,"%Y-%m-%d")))
    end_timestamp = int(time.mktime(time.strptime(end_time,"%Y-%m-%d")))

    if len(kwargs)==0:
        cf = pycassa.ColumnFamily(pool,'bucket_data_cli')
        expr_list = []
        bucketLength_expr = pycassa.create_index_expression("bucketLength",604800)
        expr_list.append(bucketLength_expr)
        start_expr = pycassa.create_index_expression("startTime",start_timestamp,pycassa.GTE)
        expr_list.append(start_expr)
        end_expr = pycassa.create_index_expression("startTime",end_timestamp,pycassa.LTE)
        expr_list.append(end_expr)
            
        for key,value in eval_prefs(**kwargs).items():
            kwarg_expr = pycassa.create_index_expression(key,value)
            expr_list.append(kwarg_expr)
    
        clause = pycassa.create_index_clause(expr_list,count=600000)
        start = time.time() 

        data_dict = {}
        for key,columns in cf.get_indexed_slices(clause):
            data_dict.setdefault(columns[groupby],Counter())
            data_dict[columns[groupby]][columns["startTime"]] += columns[generate]
        #print data_dict
        new_data_dict = {}
        for key,values in data_dict.items():
            new_data_dict.setdefault(key,[])
            new_data_dict[key] = sorted(data_dict[key].items(),key=lambda k:k[0])
            
        #print new_data_dict
        #return 0

        if cumulative:
            print "True"
            for key,values in new_data_dict.items():
                cum_value = make_cumulative_data(zip(*values)[1])
                new_data_dict[key] = zip(zip(*values)[0],cum_value) 
        #print new_data_dict 
        #return 0
        groupby_list = []
        time_list = []
        value_list = []
        fig = pylab.figure()
        ax = pylab.axes()
        zorder = 0.0
        y_max = 0.0
        y_max_list = []
        for key,values in new_data_dict.items():
            groupby_list.append(key)
            color = generate_color(str(key))
            time_list,value_list = zip(*values)
            x_min = time_list[0] 
            x_max = time_list[-1] 
            y_tmp_max = max(value_list)
            if y_max<y_tmp_max: 
                y_max = y_tmp_max
            y_max_list.append(y_max)
            pairs = [(x_min,0)]+values+[(x_max,0)]
            poly = pylab.Polygon(pairs,fill=True,facecolor=color,linewidth=.2,zorder=-y_max)
            ax.add_patch(poly)
            #zorder -=0.1
        #stamp_start_time = int(time.mktime(time.strptime(start_time,'%Y-%m-%d'))) 
        #stamp_end_time = int(time.mktime(time.strptime(end_time,'%Y-%m-%d'))) 
        ax.set_xlim(start_timestamp,end_timestamp+1)
        ax.set_ylim(0,max(y_max_list)+1)
        ax.set_xticklabels([time.strftime('%y-%m-%d',time.localtime(x_time)) for x_time in ax.get_xticks()])
        #set the legend
        fontP = FontProperties() #set legend size
        fontP.set_size('xx-small')
        ax.legend(groupby_list,loc=0,bbox_to_anchor=(1,1.05),prop = fontP)
        
        pylab.title('%s groupby %s from %s to %s'%(generate,groupby,start_time,end_time))
        end = time.time()
        pylab.xlabel('Processing time is: %.5ss'%(end-start))
        pylab.savefig('linegraph')
        imgData = cStringIO.StringIO()
        pylab.savefig(imgData, format='png')
        imgData.seek(0)
        pylab.close()
        return imgData
    else:
        print 'no kwargs'
        groupby = groupby.lower()
        cf_name = "new_cum_groupby_%s"%groupby
        cf = pycassa.ColumnFamily(pool,cf_name)

        groupby_list = []
        time_list = []
        value_list = []
        y_max_list = []

        fig = pylab.figure()
        ax = pylab.axes()
        start = time.time()
        zorder = 0.0
        y_max = 0.0
        for key,columns in cf.get_range(column_count=365000,column_start=(generate,start_time),column_finish=(generate,end_time)):
            #print key,columns
            #'''
            groupby_list.append(key)
            color = generate_color(str(key))
            for name,value in columns.items():
                #time_list.append(int(time.mktime(time.strptime(name[1],'%Y-%m-%d'))/86400))
                time_list.append(int(time.mktime(time.strptime(name[1],'%Y-%m-%d'))))
                value_list.append(value)            
            x_min = time_list[0]
            x_max = time_list[-1]
            if cumulative:
                value_list = make_cumulative_data(value_list)
            y_tmp_max = max(value_list)
            if y_max<y_tmp_max: 
                y_max=y_tmp_max 
            pairs = zip(time_list,value_list)
            pairs = [(x_min,0)]+pairs+[(x_max,0)]
            #print pairs
            time_list = [] #clean time_list
            value_list = [] #clean value_list
            poly = pylab.Polygon(pairs,fill=True,facecolor=color,linewidth=.2,zorder=zorder)
            ax.add_patch(poly)
            zorder -=0.1
        ax.set_xlim(start_timestamp,end_timestamp+1)
        ax.set_ylim(0,y_max+1)
        ax.set_xticklabels([time.strftime('%y-%m-%d',time.localtime(int(day))) for day in ax.get_xticks()])
        #set the legend
        fontP = FontProperties()
        fontP.set_size('xx-small')
        ax.legend(groupby_list,loc=0,bbox_to_anchor=(1,1.05),prop = fontP)
        
        pylab.title('%s groupby %s from %s to %s'%(generate,groupby,start_time,end_time))
        end = time.time()
        pylab.xlabel('Processing time is: %.5ss'%(end-start))
        #day_finish_time = int(time.mktime(time.strptime(finish_time,'%Y-%m-%d'))/86400)
        #print 'pricessing time is:%s'%(end-start)
        pylab.savefig('linegraph')
        imgData = cStringIO.StringIO()
        pylab.savefig(imgData, format='png')
        imgData.seek(0)
        pylab.close()
        return imgData