示例#1
0
def get_session_profile(sid, src=None):

    if not src:
        src = "%s/%s" % (os.getcwd(), sid)

    if os.path.exists(src):
        # we have profiles locally
        profiles = glob.glob("%s/*.prof" % src)
        profiles += glob.glob("%s/*/*.prof" % src)
    else:
        # need to fetch profiles
        from .session import fetch_profiles
        profiles = fetch_profiles(sid=sid, skip_existing=True)

    #  filter out some frequent, but uninteresting events
    efilter = {
        ru.EVENT: ['publish', 'work start', 'work done'],
        ru.MSG:
        ['update unit state', 'unit update pushed', 'bulked', 'bulk size']
    }

    profiles = ru.read_profiles(profiles, sid, efilter=efilter)
    profile, accuracy = ru.combine_profiles(profiles)
    profile = ru.clean_profile(profile, sid, rps.FINAL, rps.CANCELED)
    hostmap = get_hostmap(profile)

    if not hostmap:
        # FIXME: legacy host notation - deprecated
        hostmap = get_hostmap_deprecated(profiles)

    return profile, accuracy, hostmap
def get_session_profile(sid, src=None):

    if not src:
        src = "%s/%s" % (os.getcwd(), sid)

    if os.path.exists(src):
        # we have profiles locally
        profiles  = glob.glob("%s/*.prof"   % src)
        profiles += glob.glob("%s/*/*.prof" % src)
    else:
        # need to fetch profiles
        from .session import fetch_profiles
        profiles = fetch_profiles(sid=sid, skip_existing=True)

    #  filter out some frequent, but uninteresting events
    efilter = {ru.EVENT : ['publish', 'work start', 'work done'], 
               ru.MSG   : ['update unit state', 'unit update pushed', 
                            'bulked', 'bulk size']
              }

    profiles          = ru.read_profiles(profiles, sid, efilter=efilter)
    profile, accuracy = ru.combine_profiles(profiles)
    profile           = ru.clean_profile(profile, sid, rps.FINAL, rps.CANCELED)
    hostmap           = get_hostmap(profile)

    if not hostmap:
        # FIXME: legacy host notation - deprecated
        hostmap = get_hostmap_deprecated(profiles)

    return profile, accuracy, hostmap
示例#3
0
def get_session_profile(sid, src=None):

    if not src:
        src = "%s/%s" % (os.getcwd(), sid)

    if os.path.exists(src):
        # we have profiles locally
        profiles = glob.glob("%s/*.prof" % src)
        profiles += glob.glob("%s/*/*.prof" % src)
    else:
        # need to fetch profiles
        from .session import fetch_profiles
        profiles = fetch_profiles(sid=sid, skip_existing=True)

    #  filter out some frequent, but uninteresting events
    efilter = {
        ru.EVENT: [
            # 'get',
            'publish',
            'schedule_skip',
            'schedule_fail',
            'staging_stderr_start',
            'staging_stderr_stop',
            'staging_stdout_start',
            'staging_stdout_stop',
            'staging_uprof_start',
            'staging_uprof_stop',
            'update_pushed',
        ]
    }

    profiles = ru.read_profiles(profiles, sid, efilter=efilter)
    profile, accuracy = ru.combine_profiles(profiles)
    profile = ru.clean_profile(profile, sid, s.FINAL, s.CANCELED)
    hostmap = get_hostmap(profile)

    if not hostmap:
        # FIXME: legacy host notation - deprecated
        hostmap = get_hostmap_deprecated(profiles)

    return profile, accuracy, hostmap
示例#4
0
def get_session_profile(sid, src=None):

    if not src:
        src = os.getcwd()

    if os.path.exists(src):

        # EnTK profiles are always on localhost
        profiles  = glob.glob("%s/*.prof"   % (src))
        profiles += glob.glob("%s/*/*.prof" % (src))

    else:
        profiles  = glob.glob("./%s/*.prof"   % (sid))
        profiles += glob.glob("./%s/*/*.prof" % (sid))

    if not profiles:
        raise EnTKError('No profiles found at %s' % src)


    try:
        profiles = ru.read_profiles(profiles=profiles, sid=sid)
        prof, acc = ru.combine_profiles(profiles)
        prof = ru.clean_profile(prof,
                                sid=sid,
                                state_final=res.FINAL,
                                state_canceled=res.CANCELED)

        hostmap = get_hostmap(prof)

        if not hostmap:
            # FIXME: legacy host notation - deprecated
            hostmap = get_hostmap_deprecated(profiles)

        return prof, acc, hostmap

    except Exception as ex:

        # Push the exception raised by child functions
        print(traceback.format_exc())
        raise EnTKError('Error: %s' % ex)
def get_session_profile(sid, src=None):

    if not src:
        src = os.getcwd()

    if os.path.exists(src):

        # EnTK profiles are always on localhost
        profiles = glob.glob("%s/%s/*.prof" % (src, sid))

    else:
        raise EnTKError('%s/%s does not exist' % (src, sid))

    if not profiles:
        raise EnTKError('No profiles found at %s' % src)

    try:

        profiles = ru.read_profiles(profiles=profiles, sid=sid)
        prof, acc = ru.combine_profiles(profiles)
        prof = ru.clean_profile(prof,
                                sid=sid,
                                state_final=res.FINAL,
                                state_canceled=res.CANCELED)

        hostmap = get_hostmap(prof)

        if not hostmap:
            # FIXME: legacy host notation - deprecated
            hostmap = get_hostmap_deprecated(profiles)

        return prof, acc, hostmap

    except Exception as ex:

        # Push the exception raised by child functions
        print traceback.format_exc()
        raise EnTKError('Error: %s' % ex)
    def __init__(self, src, stype, sid=None, _entities=None, _init=True):
        '''
        Create a radical.analytics session for analysis.

        The session is created from a set of profiles, which usually have been
        produced from some other session object in the RCT stack, such as
        radical.pilot. Profiles are accepted in two forms: in a directory, or in
        a tarball (of such a directory).  In the latter case, the tarball are
        extracted into `$TMP`, and then handled just as the directory case.

        If no `sid` (session ID) is specified, that ID is derived from the
        directory name.
        '''

        if not os.path.exists(src):
            raise ValueError('src [%s] does not exist' % src)

        if os.path.isdir(src):
            pass

        elif os.path.isfile(src):

            # src is afile - we assume its a tarball and extract it
            if  src.endswith('.prof'):
                # use as is
                tgt = src

            elif src.endswith('.tgz') or \
                 src.endswith('.tbz')    :
                tgt = src[:-4]

            elif src.endswith('.tar.gz') or \
                 src.endswith('.tar.bz')    :
                tgt = src[:-7]

            elif src.endswith('.prof'):
                tgt = None

            else:
                raise ValueError('src does not look like a tarball or profile')


            if tgt and not os.path.exists(tgt):

                # need to extract
                print 'extract tarball to %s' % tgt
                try:
                    tf = tarfile.open(name=src, mode='r:bz2')
                    tf.extractall(path=os.path.dirname(tgt))

                except Exception as e:
                    raise RuntimeError('Cannot extract tarball: %s' % repr(e))

            # switch to the extracted data dir
            if tgt:
                src = tgt


        # if no sid is given, we assume its the directory name
        if not sid:
            if src.endswith('/'):
                src = src[:-1]
            sid = os.path.basename(src)

        self._sid   = sid
        self._src   = src
        self._stype = stype

      # print 'sid: %s [%s]' % (sid, stype)
      # print 'src: %s'      % src

        if stype == 'radical':

            # src is expected to point either to a single profile, or to
            # a directory tree containing profiles
            if not src:
                raise ValueError('RA session types need `src` specified')

            profiles = list()
            if os.path.isfile(src):
                profiles.append(src)
            else:
                for root, dirs, files in os.walk(src):
                    for f in files:
                        if f.endswith('.prof'):
                            profiles.append('%s/%s' % (root, f))

            profiles                = ru.read_profiles(profiles, sid=sid)
            self._profile, accuracy = ru.combine_profiles(profiles)
            self._description       = {'tree'     : dict(),
                                       'entities' : list(),
                                       'hostmap'  : dict(),
                                       'accuracy' : 0.0}


        elif stype == 'radical.pilot':

            import radical.pilot.utils as rpu
            self._profile, accuracy, hostmap \
                              = rpu.get_session_profile    (sid=sid, src=self._src)
            self._description = rpu.get_session_description(sid=sid, src=self._src)

            self._description['accuracy'] = accuracy
            self._description['hostmap']  = hostmap


        elif stype == 'radical.entk':

            import radical.entk.utils as reu

            self._profile, accuracy, hostmap \
                              = reu.get_session_profile    (sid=sid, src=self._src)
            self._description = reu.get_session_description(sid=sid, src=self._src)

            self._description['accuracy'] = accuracy
            self._description['hostmap']  = hostmap


        else:
            raise ValueError('unsupported session type [%s]' % stype)


        self._t_start = None
        self._t_stop  = None
        self._ttc     = None
        self._log     = ru.Logger('radical.analytics')
        self._rep     = ru.Reporter('radical.analytics')


        # internal state is represented by a dict of entities:
        # dict keys are entity uids (which are assumed to be unique per
        # session), dict values are ra.Entity instances.
        self._entities = dict()
        if _init:
            self._initialize_entities(self._profile)

        # we do some bookkeeping in self._properties where we keep a list of
        # property values around which we encountered in self._entities.
        self._properties = dict()
        if _init:
            self._initialize_properties()