def site_size_parser(sql_results, globals=globals(), **kw):
    """
    Take in CPU hours information (# of wall hours per timespan per site)
    and convert it into the "size" of the site as a function of time.
    """
    #print "in site size parser"
    results, md = results_parser(sql_results, globals=globals, **kw)
    span = kw['span']
    hours = span / 3600.
    sites = results.keys()
    new_results = {}
    all_intervals = sets.Set()
    for site in sites:
        intervals = results[site].keys()
        all_intervals.union_update(intervals)
    #print len(all_intervals)
    all_intervals = list(all_intervals)
    all_intervals.sort()
    for site in sites:
        new_results[site] = {}
        mymax = NMax(2)
        for start in all_intervals:
            avg_cpus = results[site].get(start, 0) / hours
            mymax.add_datum(avg_cpus)
            new_results[site][start] = mymax.get_max()
    return new_results, md
示例#2
0
def site_size_parser(sql_results, globals=globals(), **kw):
    """
    Take in CPU hours information (# of wall hours per timespan per site)
    and convert it into the "size" of the site as a function of time.
    """
    #print "in site size parser"
    results, md = results_parser(sql_results, globals=globals, **kw)
    span = kw['span']
    hours = span / 3600.
    sites = results.keys()
    new_results = {}
    all_intervals = sets.Set()
    for site in sites:
        intervals = results[site].keys()
        all_intervals.union_update(intervals)
    #print len(all_intervals)
    all_intervals = list(all_intervals)
    all_intervals.sort()
    for site in sites:
        new_results[site] = {}
        mymax = NMax(2)
        for start in all_intervals:
            avg_cpus = results[site].get(start, 0) / hours
            mymax.add_datum(avg_cpus)
            new_results[site][start] = mymax.get_max()
    return new_results, md
示例#3
0
def non_physics_filter(sql_results, globals=globals(), **kw):
    """
    Removes results for Physics VOs.
    """
    results, md = results_parser(sql_results, globals=globals, **kw)
    hep_vos = physics_classifier(results.keys(), globals=globals)
    #print "HEP VOs"
    #print "\n".join(hep_vos)
    filtered_results = {}
    for pivot, group in results.items():
        if pivot not in hep_vos:
            filtered_results[pivot] = group
    return filtered_results, md
def non_physics_filter(sql_results, globals=globals(), **kw):
    """
    Removes results for Physics VOs.
    """
    results, md = results_parser(sql_results, globals=globals, **kw)
    hep_vos = physics_classifier(results.keys(), globals=globals)
    #print "HEP VOs"
    #print "\n".join(hep_vos)
    filtered_results = {}
    for pivot, group in results.items():
        if pivot not in hep_vos:
            filtered_results[pivot] = group
    return filtered_results, md
示例#5
0
def science_classifier(sql_results, globals=globals(), default="Other", **kw):
    """
    Take in some VO-based metric and convert it to a field of science-based
    metric.  Uses the fact that the field of science is recorded by OIM.
    """
    results, md = results_parser(sql_results, globals=globals, **kw)
    fields_of_science, _ = globals['RSVQueries'].field_of_science()
    fields_of_science += addl_fields_of_science
    gratia_vos = results.keys()
    #print fields_of_science
    oim_vos = [i[0] for i in fields_of_science]
    oim_to_gratia, gratia_to_oim = OIM_to_gratia_mapper(oim_vos, gratia_vos)
    vo_to_science = {}
    for oim_vo, science_field in fields_of_science:
        current_science = vo_to_science.get(oim_vo, '')
        precedence_cur = precedence.get(current_science, 99)
        precedence_new = precedence.get(science_field)
        #print oim_vo, science_field, precedence_new, current_science, precedence_cur
        if precedence_new < precedence_cur:
            vo_to_science[oim_vo] = science_field
    #print "Gratia VO to Science"
    #for vo, science in vo_to_science.items():
    #    print vo, science
    filtered_results = {}
    for pivot, groups in results.items():
        if pivot in gratia_to_oim and gratia_to_oim[pivot] in vo_to_science:
            new_pivot = vo_to_science[gratia_to_oim[pivot]]
        else:
            #print "Unclassified VO:", pivot
            new_pivot = default
        if new_pivot == 'HEP':
            new_pivot = 'High Energy Physics'
        if new_pivot not in filtered_results:
            filtered_results[new_pivot] = groups
        else:
            for group, val in groups.items():
                cur = filtered_results[new_pivot].get(group, 0)
                filtered_results[new_pivot][group] = cur + val
    if 'Physics' in filtered_results:
        filtered_results['non-HEP Physics'] = filtered_results['Physics']
        del filtered_results['Physics']
    return filtered_results, md
def science_classifier(sql_results, globals=globals(), default="Other", **kw):
    """
    Take in some VO-based metric and convert it to a field of science-based
    metric.  Uses the fact that the field of science is recorded by OIM.
    """
    results, md = results_parser(sql_results, globals=globals, **kw)
    fields_of_science, _ = globals['RSVQueries'].field_of_science()
    fields_of_science += addl_fields_of_science
    gratia_vos = results.keys()
    #print fields_of_science
    oim_vos = [i[0] for i in fields_of_science]
    oim_to_gratia, gratia_to_oim = OIM_to_gratia_mapper(oim_vos, gratia_vos)
    vo_to_science = {}
    for oim_vo, science_field in fields_of_science:
        current_science = vo_to_science.get(oim_vo, '')
        precedence_cur = precedence.get(current_science, 99)
        precedence_new = precedence.get(science_field)
        #print oim_vo, science_field, precedence_new, current_science, precedence_cur
        if precedence_new < precedence_cur:
            vo_to_science[oim_vo] = science_field
    #print "Gratia VO to Science"
    #for vo, science in vo_to_science.items():
    #    print vo, science
    filtered_results = {}
    for pivot, groups in results.items():
        if pivot in gratia_to_oim and gratia_to_oim[pivot] in vo_to_science:
            new_pivot = vo_to_science[gratia_to_oim[pivot]]
        else:
            #print "Unclassified VO:", pivot
            new_pivot = default
        if new_pivot == 'HEP':
            new_pivot = 'High Energy Physics'
        if new_pivot not in filtered_results:
            filtered_results[new_pivot] = groups
        else:
            for group, val in groups.items():
                cur = filtered_results[new_pivot].get(group, 0)
                filtered_results[new_pivot][group] = cur + val
    if 'Physics' in filtered_results:
        filtered_results['non-HEP Physics'] = filtered_results['Physics']
        del filtered_results['Physics']
    return filtered_results, md
示例#7
0
def osg_site_size(sql_results, globals=globals(), **kw):
    """
    Calculate the OSG's size in terms of utilized CPUs, accessible CPUs, and
    total CPUs.  Break down these statistics by site.
    """

    USED = 'Max Used'
    UNACCESSIBLE = 'In OSG, but never used'

    if 'normalize' in kw and kw['normalize'].lower().find('t') >= 0:
        normalize = True
    else:
        normalize = False
    utilized_results, md = results_parser(sql_results, globals=globals, **kw)
    accessible_results, _ = globals['GratiaBarQueries'].osg_avail_size(
        span=7 * 86400, starttime=time.time() - 7 * 86400 * 52)
    total_results, _ = globals['GIPQueries'].gip_site_size(
        span=7 * 86400, starttime=time.time() - 7 * 86400 * 52, max_size=20000)
    ksi2k_results, _ = globals['GIPQueries'].subcluster_score_ts()
    ksi2k_results2, _ = globals['GIPQueries'].subcluster_score_ts2()
    ksi2k_results2 = ksi2k_results2['Nebraska']
    sites = utilized_results.keys()
    new_results = {}
    all_intervals = sets.Set()
    for site in sites:
        intervals = utilized_results[site].keys()
        all_intervals.union_update(intervals)
    all_intervals = list(all_intervals)
    all_intervals.sort()
    total_utilized_results = {}
    total_accessible_results = {}
    total_total_results = {}
    final_results = {USED: {}, ACCESSIBLE: {}, UNACCESSIBLE: {}}
    may_1 = time.mktime((2008, 05, 01, 0, 0, 0, 0, 0, 0))
    avg_ksi2k_results = {}
    ksi2k_min = min(1.7, ksi2k_results2.values())
    ksi2k_max = ksi2k_min
    for interval in all_intervals:
        ksi2k_max = max(ksi2k_results2.get(interval, ksi2k_min), ksi2k_max)
        avg_ksi2k_results[interval] = ksi2k_max
    prev_interval = 0
    for interval in all_intervals:

        # Process accessible numbers
        current_acc = 0
        for site, vals in accessible_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()),
                            avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                current_acc = vals.get(interval, 0) * ksi2k
            else:
                current_acc = vals.get(interval, 0)
            prev_acc = total_accessible_results.setdefault(site, 0)
            total_accessible_results[site] = max(prev_acc, current_acc)

        # Process total size numbers
        cumulative3 = 0
        for site, vals in total_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()),
                            avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                curr_total = vals.get(interval, 0) * ksi2k
            else:
                curr_total = vals.get(interval, 0)
            prev_total = total_total_results.setdefault(site, 0)
            total_total_results[site] = max(prev_total, curr_total)

        if interval < may_1:
            continue

    for site in sites:
        # Update the final results
        final_results[USED][site] = total_accessible_results.get(site, 0)
        final_results[UNACCESSIBLE][site] = max(total_total_results.get( \
            site, 0) - total_accessible_results.get(site, 0), 0)

    return final_results, md
示例#8
0
def osg_size(sql_results, globals=globals(), **kw):
    """
    Calculate the OSG's size in terms of utilized CPUs, accessible CPUs, and
    total CPUs..
    """
    if 'normalize' in kw and kw['normalize'].lower().find('t') >= 0:
        normalize = True
    else:
        normalize = False
    utilized_results, md = results_parser(sql_results, globals=globals, **kw)
    accessible_results, _ = globals['GratiaBarQueries'].osg_avail_size(
        span=7 * 86400, starttime=time.time() - 7 * 86400 * 52)
    total_results, _ = globals['GIPQueries'].gip_site_size(
        span=7 * 86400, starttime=time.time() - 7 * 86400 * 52, max_size=20000)
    ksi2k_results, _ = globals['GIPQueries'].subcluster_score_ts()
    ksi2k_results2, _ = globals['GIPQueries'].subcluster_score_ts2()
    ksi2k_results2 = ksi2k_results2['Nebraska']
    sites = utilized_results.keys()
    new_results = {}
    all_intervals = sets.Set()
    for site in sites:
        intervals = utilized_results[site].keys()
        all_intervals.union_update(intervals)
    all_intervals = list(all_intervals)
    all_intervals.sort()
    total_utilized_results = {}
    total_accessible_results = {}
    total_total_results = {}
    final_results = {
        'Used': {},
        'Accessible, but not Used': {},
        'In OSG, but not Accessible': {}
    }
    may_1 = time.mktime((2008, 05, 01, 0, 0, 0, 0, 0, 0))
    avg_ksi2k_results = {}
    ksi2k_min = min(1.7, ksi2k_results2.values())
    ksi2k_max = ksi2k_min
    for interval in all_intervals:
        ksi2k_max = max(ksi2k_results2.get(interval, ksi2k_min), ksi2k_max)
        avg_ksi2k_results[interval] = ksi2k_max
    prev_interval = 0
    for interval in all_intervals:
        cumulative = 0
        for site, vals in utilized_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()),
                            avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                cumulative += vals.get(interval, 0) * ksi2k
            else:
                cumulative += vals.get(interval, 0)
        total_utilized_results[interval] = cumulative
        cumulative2 = 0
        for site, vals in accessible_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()),
                            avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                cumulative2 += vals.get(interval, 0) * ksi2k
            else:
                cumulative2 += vals.get(interval, 0)
        total_accessible_results[interval] = cumulative2
        cumulative3 = 0
        for site, vals in total_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()),
                            avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                cumulative3 += vals.get(interval, 0) * ksi2k
            else:
                cumulative3 += vals.get(interval, 0)
        total_total_results[interval] = cumulative3

        if interval < may_1:
            continue
        final_results[USED][interval] = cumulative
        final_results[ACCESSIBLE][interval] = max(cumulative2 -\
            cumulative, 0)
        final_results[UNACCESSIBLE][interval] = max(cumulative3\
            - cumulative2, 0)

        # Make sure numbers never go down.
        # This should be true because all the numbers should be cumulative,
        # but we're just being paranoid here.
        #for pivot in [ACCESSIBLE, UNACCESSIBLE]:
        #    if prev_interval in final_results[pivot] and final_results[pivot]\
        #            [prev_interval] > final_results[pivot][interval]:
        #        final_results[pivot][interval] = final_results[pivot]\
        #            [prev_interval]
        #prev_interval = interval
    return final_results, md
def osg_site_size(sql_results, globals=globals(), **kw):
    """
    Calculate the OSG's size in terms of utilized CPUs, accessible CPUs, and
    total CPUs.  Break down these statistics by site.
    """

    USED = 'Max Used'
    UNACCESSIBLE = 'In OSG, but never used'

    if 'normalize' in kw and kw['normalize'].lower().find('t') >= 0:
        normalize = True
    else:
        normalize = False
    utilized_results, md = results_parser(sql_results, globals=globals, **kw)
    accessible_results, _ = globals['GratiaBarQueries'].osg_avail_size(span=7*86400,
        starttime=time.time()-7*86400*52)
    total_results, _ = globals['GIPQueries'].gip_site_size(span=7*86400,
        starttime=time.time()-7*86400*52, max_size=20000)
    ksi2k_results, _ = globals['GIPQueries'].subcluster_score_ts()
    ksi2k_results2, _ = globals['GIPQueries'].subcluster_score_ts2()
    ksi2k_results2 = ksi2k_results2['Nebraska']
    sites = utilized_results.keys()
    new_results = {}
    all_intervals = sets.Set()
    for site in sites:
        intervals = utilized_results[site].keys()
        all_intervals.union_update(intervals)
    all_intervals = list(all_intervals)
    all_intervals.sort()
    total_utilized_results = {}
    total_accessible_results = {}
    total_total_results = {}
    final_results = {USED: {}, ACCESSIBLE: {}, UNACCESSIBLE: {}}
    may_1 = time.mktime((2008, 05, 01, 0, 0, 0, 0, 0, 0))
    avg_ksi2k_results = {}
    ksi2k_min = min(1.7, ksi2k_results2.values())
    ksi2k_max = ksi2k_min
    for interval in all_intervals:
        ksi2k_max = max(ksi2k_results2.get(interval, ksi2k_min), ksi2k_max)
        avg_ksi2k_results[interval] = ksi2k_max
    prev_interval = 0
    for interval in all_intervals:

        # Process accessible numbers
        current_acc = 0
        for site, vals in accessible_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()), 
                    avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                current_acc = vals.get(interval, 0) * ksi2k
            else:
                current_acc = vals.get(interval, 0)
            prev_acc = total_accessible_results.setdefault(site, 0)
            total_accessible_results[site] = max(prev_acc, current_acc)

        # Process total size numbers
        cumulative3 = 0
        for site, vals in total_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()), 
                    avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                curr_total = vals.get(interval, 0) * ksi2k
            else:
                curr_total = vals.get(interval, 0)
            prev_total = total_total_results.setdefault(site, 0)
            total_total_results[site] = max(prev_total, curr_total)

        if interval < may_1:
            continue

    for site in sites:
        # Update the final results
        final_results[USED][site] = total_accessible_results.get(site, 0)
        final_results[UNACCESSIBLE][site] = max(total_total_results.get( \
            site, 0) - total_accessible_results.get(site, 0), 0)

    return final_results, md
示例#10
0
def osg_size(sql_results, globals=globals(), **kw):
    """
    Calculate the OSG's size in terms of utilized CPUs, accessible CPUs, and
    total CPUs..
    """
    if 'normalize' in kw and kw['normalize'].lower().find('t') >= 0:
        normalize = True
    else:
        normalize = False
    utilized_results, md = results_parser(sql_results, globals=globals, **kw)
    accessible_results, _ = globals['GratiaBarQueries'].osg_avail_size(span=7*86400,
        starttime=time.time()-7*86400*52)
    total_results, _ = globals['GIPQueries'].gip_site_size(span=7*86400,
        starttime=time.time()-7*86400*52, max_size=20000)
    ksi2k_results, _ = globals['GIPQueries'].subcluster_score_ts()
    ksi2k_results2, _ = globals['GIPQueries'].subcluster_score_ts2()
    ksi2k_results2 = ksi2k_results2['Nebraska']
    sites = utilized_results.keys()
    new_results = {}
    all_intervals = sets.Set()
    for site in sites:
        intervals = utilized_results[site].keys()
        all_intervals.union_update(intervals)
    all_intervals = list(all_intervals)
    all_intervals.sort()
    total_utilized_results = {}
    total_accessible_results = {}
    total_total_results = {}
    final_results = {'Used': {}, 'Accessible, but not Used': {},
        'In OSG, but not Accessible': {}}
    may_1 = time.mktime((2008, 05, 01, 0, 0, 0, 0, 0, 0))
    avg_ksi2k_results = {}
    ksi2k_min = min(1.7, ksi2k_results2.values())
    ksi2k_max = ksi2k_min
    for interval in all_intervals:
        ksi2k_max = max(ksi2k_results2.get(interval, ksi2k_min), ksi2k_max)
        avg_ksi2k_results[interval] = ksi2k_max
    prev_interval = 0
    for interval in all_intervals:
        cumulative = 0
        for site, vals in utilized_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()),
                    avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                cumulative += vals.get(interval, 0) * ksi2k
            else:
                cumulative += vals.get(interval, 0)
        total_utilized_results[interval] = cumulative
        cumulative2 = 0
        for site, vals in accessible_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()), 
                    avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                cumulative2 += vals.get(interval, 0) * ksi2k
            else:
                cumulative2 += vals.get(interval, 0)
        total_accessible_results[interval] = cumulative2
        cumulative3 = 0
        for site, vals in total_results.items():
            if site not in ksi2k_results:
                ksi2k = avg_ksi2k_results[interval]
            elif interval not in ksi2k_results[site]:
                ksi2k = min(min(ksi2k_results[site].values()), 
                    avg_ksi2k_results[interval])
            else:
                ksi2k = ksi2k_results[site][interval]
            if normalize:
                cumulative3 += vals.get(interval, 0) * ksi2k
            else:
                cumulative3 += vals.get(interval, 0)
        total_total_results[interval] = cumulative3

        if interval < may_1:
            continue
        final_results[USED][interval] = cumulative
        final_results[ACCESSIBLE][interval] = max(cumulative2 -\
            cumulative, 0)
        final_results[UNACCESSIBLE][interval] = max(cumulative3\
            - cumulative2, 0)

        # Make sure numbers never go down.
        # This should be true because all the numbers should be cumulative,
        # but we're just being paranoid here.
        #for pivot in [ACCESSIBLE, UNACCESSIBLE]:
        #    if prev_interval in final_results[pivot] and final_results[pivot]\
        #            [prev_interval] > final_results[pivot][interval]:
        #        final_results[pivot][interval] = final_results[pivot]\
        #            [prev_interval]
        #prev_interval = interval
    return final_results, md