def list_data(self, ctx, params): ''' ''' token = self._extract_token(ctx) if 'workspaces' not in params: raise ValueError( 'missing required field "workspaces" in parameters to list_data' ) if not isinstance(params['workspaces'], list): raise ValueError('"workspaces" field must be a list') workspaces = params['workspaces'] include_metadata = params.get('include_metadata', 0) ws = Workspace(self.ws_url, token=token) ws_info_list = [] if len(workspaces) == 1: workspace = workspaces[0] list_params = {} if str(workspace).isdigit(): list_params['id'] = int(workspace) else: list_params['workspace'] = str(workspace) ws_info_list.append(ws.get_workspace_info(list_params)) else: ws_map = {key: True for key in workspaces} for ws_info in ws.list_workspace_info({'perm': 'r'}): if ws_info[1] in ws_map or str(ws_info[0]) in ws_map: ws_info_list.append(ws_info) data = [] dp_list_filter = {'include_metadata': include_metadata} data_palette_refs = {} for ws_info in ws_info_list: dp = DataPalette(None, ws_info=ws_info, ws=ws) data = data + dp.list(dp_list_filter) dp_ref = dp._get_root_data_palette_ref() if dp_ref: data_palette_refs[str(ws_info[0])] = dp_ref data = self._remove_duplicate_data(data) return {'data': data, 'data_palette_refs': data_palette_refs}
class NarrativeManager: KB_CELL = 'kb-cell' KB_TYPE = 'type' KB_APP_CELL = 'kb_app' KB_FUNCTION_CELL = 'function_input' KB_OUTPUT_CELL = 'function_output' KB_ERROR_CELL = 'kb_error' KB_CODE_CELL = 'kb_code' KB_STATE = 'widget_state' DEBUG = False DATA_PALETTES_TYPES = DataPaletteTypes(False) def __init__(self, config, ctx, set_api_cache, dps_cache): self.narrativeMethodStoreURL = config['narrative-method-store'] self.set_api_cache = set_api_cache # DynamicServiceCache type self.dps_cache = dps_cache # DynamicServiceCache type self.token = ctx["token"] self.user_id = ctx["user_id"] self.ws = Workspace(config['workspace-url'], token=self.token) self.intro_md_file = config['intro-markdown-file'] # We switch DPs on only for internal Continuous Integration environment for now: if config['kbase-endpoint'].startswith("https://ci.kbase.us/"): self.DATA_PALETTES_TYPES = DataPaletteTypes(True) def list_objects_with_sets(self, ws_id=None, ws_name=None, workspaces=None, types=None, include_metadata=0): if not workspaces: if (not ws_id) and (not ws_name): raise ValueError( "One and only one of 'ws_id', 'ws_name', 'workspaces' " + "parameters should be set") workspaces = [self._get_workspace_name_or_id(ws_id, ws_name)] return self._list_objects_with_sets(workspaces, types, include_metadata) def _list_objects_with_sets(self, workspaces, types, include_metadata): type_map = None if types is not None: type_map = {key: True for key in types} processed_refs = {} data = [] if self.DEBUG: print("NarrativeManager._list_objects_with_sets: processing sets") t1 = time.time() set_ret = self.set_api_cache.call_method( "list_sets", [{ 'workspaces': workspaces, 'include_set_item_info': 1, 'include_raw_data_palettes': 1, 'include_metadata': include_metadata }], self.token) sets = set_ret['sets'] dp_data = set_ret.get('raw_data_palettes') dp_refs = set_ret.get('raw_data_palette_refs') for set_info in sets: # Process target_set_items = [] for set_item in set_info['items']: target_set_items.append(set_item['info']) if self._check_info_type(set_info['info'], type_map): data_item = { 'object_info': set_info['info'], 'set_items': { 'set_items_info': target_set_items } } data.append(data_item) processed_refs[set_info['ref']] = data_item if self.DEBUG: print(" (time=" + str(time.time() - t1) + ")") if self.DEBUG: print("NarrativeManager._list_objects_with_sets: loading ws_info") t2 = time.time() ws_info_list = [] #for ws in workspaces: if len(workspaces) == 1: ws = workspaces[0] ws_id = None ws_name = None if str(ws).isdigit(): ws_id = int(ws) else: ws_name = str(ws) ws_info_list.append( self.ws.get_workspace_info({ "id": ws_id, "workspace": ws_name })) else: ws_map = {key: True for key in workspaces} for ws_info in self.ws.list_workspace_info({'perm': 'r'}): if ws_info[1] in ws_map or str(ws_info[0]) in ws_map: ws_info_list.append(ws_info) if self.DEBUG: print(" (time=" + str(time.time() - t2) + ")") if self.DEBUG: print( "NarrativeManager._list_objects_with_sets: loading workspace objects" ) t3 = time.time() for info in WorkspaceListObjectsIterator( self.ws, ws_info_list=ws_info_list, list_objects_params={'includeMetadata': include_metadata}): item_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) if item_ref not in processed_refs and self._check_info_type( info, type_map): data_item = {'object_info': info} data.append(data_item) processed_refs[item_ref] = data_item if self.DEBUG: print(" (time=" + str(time.time() - t3) + ")") if self.DEBUG: print( "NarrativeManager._list_objects_with_sets: processing DataPalettes" ) t5 = time.time() if dp_data is None or dp_refs is None: dps = self.dps_cache dp_ret = dps.call_method("list_data", [{ 'workspaces': workspaces, 'include_metadata': include_metadata }], self.token) dp_data = dp_ret['data'] dp_refs = dp_ret['data_palette_refs'] for item in dp_data: ref = item['ref'] if self._check_info_type(item['info'], type_map): data_item = None if ref in processed_refs: data_item = processed_refs[ref] else: data_item = {'object_info': item['info']} processed_refs[ref] = data_item data.append(data_item) dp_info = {} if 'dp_ref' in item: dp_info['ref'] = item['dp_ref'] if 'dp_refs' in item: dp_info['refs'] = item['dp_refs'] data_item['dp_info'] = dp_info if self.DEBUG: print(" (time=" + str(time.time() - t5) + ")") return {"data": data, 'data_palette_refs': dp_refs} def _check_info_type(self, info, type_map): if type_map is None: return True obj_type = info[2].split('-')[0] return type_map.get(obj_type, False) def copy_narrative(self, newName, workspaceRef, workspaceId): time_ms = int(round(time.time() * 1000)) newWsName = self.user_id + ':narrative_' + str(time_ms) # add the 'narrative' field to newWsMeta later. newWsMeta = {"is_temporary": "false", "narrative_nice_name": newName} # start with getting the existing narrative object. currentNarrative = self.ws.get_objects([{'ref': workspaceRef}])[0] if not workspaceId: workspaceId = currentNarrative['info'][6] # Let's prepare exceptions for clone the workspace. # 1) currentNarrative object: excluded_list = [{'objid': currentNarrative['info'][0]}] # 2) let's exclude objects of types under DataPalette handling: data_palette_type = "DataPalette.DataPalette" excluded_types = [data_palette_type] excluded_types.extend(self.DATA_PALETTES_TYPES.keys()) add_to_palette_list = [] dp_detected = False for obj_type in excluded_types: list_objects_params = {'type': obj_type} if obj_type == data_palette_type: list_objects_params['showHidden'] = 1 for info in WorkspaceListObjectsIterator( self.ws, ws_id=workspaceId, list_objects_params=list_objects_params): if obj_type == data_palette_type: dp_detected = True else: add_to_palette_list.append({ 'ref': str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) }) excluded_list.append({'objid': info[0]}) # clone the workspace EXCEPT for currentNarrative object + obejcts of DataPalette types: newWsId = self.ws.clone_workspace({ 'wsi': { 'id': workspaceId }, 'workspace': newWsName, 'meta': newWsMeta, 'exclude': excluded_list })[0] try: if dp_detected: self.dps_cache.call_method( "copy_palette", [{ 'from_workspace': str(workspaceId), 'to_workspace': str(newWsId) }], self.token) if len(add_to_palette_list) > 0: # There are objects in source workspace that have type under DataPalette handling # but these objects are physically stored in source workspace rather that saved # in DataPalette object. So they weren't copied by "dps.copy_palette". self.dps_cache.call_method("add_to_palette", [{ 'workspace': str(newWsId), 'new_refs': add_to_palette_list }], self.token) # update the ref inside the narrative object and the new workspace metadata. newNarMetadata = currentNarrative['info'][10] newNarMetadata['name'] = newName newNarMetadata['ws_name'] = newWsName newNarMetadata['job_info'] = json.dumps({ 'queue_time': 0, 'running': 0, 'completed': 0, 'run_time': 0, 'error': 0 }) currentNarrative['data']['metadata']['name'] = newName currentNarrative['data']['metadata']['ws_name'] = newWsName currentNarrative['data']['metadata']['job_ids'] = { 'apps': [], 'methods': [], 'job_usage': { 'queue_time': 0, 'run_time': 0 } } # save the shiny new Narrative so it's at version 1 newNarInfo = self.ws.save_objects({ 'id': newWsId, 'objects': [{ 'type': currentNarrative['info'][2], 'data': currentNarrative['data'], 'provenance': currentNarrative['provenance'], 'name': currentNarrative['info'][1], 'meta': newNarMetadata }] }) # now, just update the workspace metadata to point # to the new narrative object newNarId = newNarInfo[0][0] self.ws.alter_workspace_metadata({ 'wsi': { 'id': newWsId }, 'new': { 'narrative': str(newNarId) } }) return {'newWsId': newWsId, 'newNarId': newNarId} except: # let's delete copy of workspace so it's out of the way - it's broken self.ws.delete_workspace({'id': newWsId}) raise # continue raising previous exception def create_new_narrative(self, app, method, appparam, appData, markdown, copydata, importData, includeIntroCell): if app and method: raise ValueError( "Must provide no more than one of the app or method params") if (not importData) and copydata: importData = copydata.split(';') if (not appData) and appparam: appData = [] for tmp_item in appparam.split(';'): tmp_tuple = tmp_item.split(',') step_pos = None if tmp_tuple[0]: try: step_pos = int(tmp_tuple[0]) except ValueError: pass appData.append([step_pos, tmp_tuple[1], tmp_tuple[2]]) cells = None if app: cells = [{"app": app}] elif method: cells = [{"method": method}] elif markdown: cells = [{"markdown": markdown}] return self._create_temp_narrative(cells, appData, importData, includeIntroCell) def _get_intro_markdown(self): """ Creates and returns a cell with the introductory text included. """ # Load introductory markdown text with open(self.intro_md_file) as intro_file: intro_md = intro_file.read() return intro_md def _create_temp_narrative(self, cells, parameters, importData, includeIntroCell): # Migration to python of JavaScript class from https://github.com/kbase/kbase-ui/blob/4d31151d13de0278765a69b2b09f3bcf0e832409/src/client/modules/plugins/narrativemanager/modules/narrativeManager.js#L414 narr_id = int(round(time.time() * 1000)) workspaceName = self.user_id + ':narrative_' + str(narr_id) narrativeName = "Narrative." + str(narr_id) ws = self.ws ws_info = ws.create_workspace({ 'workspace': workspaceName, 'description': '' }) newWorkspaceInfo = ServiceUtils.workspaceInfoToObject(ws_info) [narrativeObject, metadataExternal ] = self._fetchNarrativeObjects(workspaceName, cells, parameters, includeIntroCell) objectInfo = ws.save_objects({ 'workspace': workspaceName, 'objects': [{ 'type': 'KBaseNarrative.Narrative', 'data': narrativeObject, 'name': narrativeName, 'meta': metadataExternal, 'provenance': [{ 'script': 'NarrativeManager.py', 'description': 'Created new ' + 'Workspace/Narrative bundle.' }], 'hidden': 0 }] })[0] objectInfo = ServiceUtils.objectInfoToObject(objectInfo) self._completeNewNarrative(newWorkspaceInfo['id'], objectInfo['id'], importData) return {'workspaceInfo': newWorkspaceInfo, 'narrativeInfo': objectInfo} def _fetchNarrativeObjects(self, workspaceName, cells, parameters, includeIntroCell): if not cells: cells = [] # fetchSpecs appSpecIds = [] methodSpecIds = [] specMapping = {'apps': {}, 'methods': {}} for cell in cells: if 'app' in cell: appSpecIds.append(cell['app']) elif 'method' in cell: methodSpecIds.append(cell['method']) nms = NarrativeMethodStore(self.narrativeMethodStoreURL, token=self.token) if len(appSpecIds) > 0: appSpecs = nms.get_app_spec({'ids': appSpecIds}) for spec in appSpecs: spec_id = spec['info']['id'] specMapping['apps'][spec_id] = spec if len(methodSpecIds) > 0: methodSpecs = nms.get_method_spec({'ids': methodSpecIds}) for spec in methodSpecs: spec_id = spec['info']['id'] specMapping['methods'][spec_id] = spec # end of fetchSpecs metadata = { 'job_ids': { 'methods': [], 'apps': [], 'job_usage': { 'queue_time': 0, 'run_time': 0 } }, 'format': 'ipynb', 'creator': self.user_id, 'ws_name': workspaceName, 'name': 'Untitled', 'type': 'KBaseNarrative.Narrative', 'description': '', 'data_dependencies': [] } cellData = self._gatherCellData(cells, specMapping, parameters, includeIntroCell) narrativeObject = { 'nbformat_minor': 0, 'cells': cellData, 'metadata': metadata, 'nbformat': 4 } metadataExternal = {} for key in metadata: value = metadata[key] if isinstance(value, basestring): metadataExternal[key] = value else: metadataExternal[key] = json.dumps(value) return [narrativeObject, metadataExternal] def _gatherCellData(self, cells, specMapping, parameters, includeIntroCell): cell_data = [] if includeIntroCell == 1: cell_data.append({ 'cell_type': 'markdown', 'source': self._get_intro_markdown(), 'metadata': {} }) for cell_pos, cell in enumerate(cells): if 'app' in cell: cell_data.append( self._buildAppCell(len(cell_data), specMapping['apps'][cell['app']], parameters)) elif 'method' in cell: cell_data.append( self._buildMethodCell( len(cell_data), specMapping['methods'][cell['method']], parameters)) elif 'markdown' in cell: cell_data.append({ 'cell_type': 'markdown', 'source': cell['markdown'], 'metadata': {} }) else: raise ValueError("cannot add cell #" + str(cell_pos) + ", unrecognized cell content") return cell_data def _buildAppCell(self, pos, spec, params): cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4()) cell = { 'cell_type': 'markdown', 'source': "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" + cellId + "').kbaseNarrativeAppCell({'appSpec' : '" + self._safeJSONStringify(spec) + "', 'cellId' : '" + cellId + "'});" + "</script>", 'metadata': {} } cellInfo = {} widgetState = [] cellInfo[self.KB_TYPE] = self.KB_APP_CELL cellInfo['app'] = spec if params: steps = {} for param in params: stepid = 'step_' + str(param[0]) if stepid not in steps: steps[stepid] = {} steps[stepid]['inputState'] = {} steps[stepid]['inputState'][param[1]] = param[2] state = { 'state': { 'step': steps } } widgetState.append(state) cellInfo[self.KB_STATE] = widgetState cell['metadata'][self.KB_CELL] = cellInfo return cell def _buildMethodCell(self, pos, spec, params): cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4()) cell = { 'cell_type': 'markdown', 'source': "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" + cellId + "').kbaseNarrativeMethodCell({'method' : '" + self._safeJSONStringify(spec) + "'});" + "</script>", 'metadata': {} } cellInfo = {'method': spec, 'widget': spec['widgets']['input']} cellInfo[self.KB_TYPE] = self.KB_FUNCTION_CELL widgetState = [] if params: wparams = {} for param in params: wparams[param[1]] = param[2] widgetState.append({'state': wparams}) cellInfo[self.KB_STATE] = widgetState cell['metadata'][self.KB_CELL] = cellInfo return cell def _completeNewNarrative(self, workspaceId, objectId, importData): self.ws.alter_workspace_metadata({ 'wsi': { 'id': workspaceId }, 'new': { 'narrative': str(objectId), 'is_temporary': 'true' } }) # copy_to_narrative: if not importData: return objectsToCopy = [{'ref': x} for x in importData] infoList = self.ws.get_object_info_new({ 'objects': objectsToCopy, 'includeMetadata': 0 }) for item in infoList: objectInfo = ServiceUtils.objectInfoToObject(item) self.copy_object(objectInfo['ref'], workspaceId, None, None, objectInfo) def _safeJSONStringify(self, obj): return json.dumps(self._safeJSONStringifyPrepare(obj)) def _safeJSONStringifyPrepare(self, obj): if isinstance(obj, basestring): return obj.replace("'", "'").replace('"', """) elif isinstance(obj, list): for pos in range(len(obj)): obj[pos] = self._safeJSONStringifyPrepare(obj[pos]) elif isinstance(obj, dict): obj_keys = list(obj.keys()) for key in obj_keys: obj[key] = self._safeJSONStringifyPrepare(obj[key]) else: pass # it's boolean/int/float/None return obj def _get_workspace_name_or_id(self, ws_id, ws_name): ret = ws_name if not ret: ret = str(ws_id) return ret def copy_object(self, ref, target_ws_id, target_ws_name, target_name, src_info): # There should be some logic related to DataPalettes if (not target_ws_id) and (not target_ws_name): raise ValueError("Neither target workspace ID nor name is defined") if not src_info: src_info_tuple = self.ws.get_object_info_new({ 'objects': [{ 'ref': ref }], 'includeMetadata': 0 })[0] src_info = ServiceUtils.objectInfoToObject(src_info_tuple) type_name = src_info['typeModule'] + '.' + src_info['typeName'] type_config = self.DATA_PALETTES_TYPES.get(type_name) if type_config is not None: # Copy with DataPaletteService if target_name: raise ValueError( "'target_name' cannot be defined for DataPalette copy") target_ws_name_or_id = self._get_workspace_name_or_id( target_ws_id, target_ws_name) self.dps_cache.call_method("add_to_palette", [{ 'workspace': target_ws_name_or_id, 'new_refs': [{ 'ref': ref }] }], self.token) return {'info': src_info} else: if not target_name: target_name = src_info['name'] obj_info_tuple = self.ws.copy_object({ 'from': { 'ref': ref }, 'to': { 'wsid': target_ws_id, 'workspace': target_ws_name, 'name': target_name } }) obj_info = ServiceUtils.objectInfoToObject(obj_info_tuple) return {'info': obj_info} def list_available_types(self, workspaces): data = self.list_objects_with_sets(workspaces=workspaces)['data'] type_stat = {} for item in data: info = item['object_info'] obj_type = info[2].split('-')[0] if obj_type in type_stat: type_stat[obj_type] += 1 else: type_stat[obj_type] = 1 return {'type_stat': type_stat}
class UJS_CAT_NJS_DataUtils: def __init__(self, workspace_url, job_service_url, srv_wiz_url, njsw_url, auth_service_url, kbase_endpoint, provenance, token): self.workspace_url = workspace_url self.job_service_url = job_service_url self.njsw_url = njsw_url self.auth_service_url = auth_service_url self.srv_wiz_url = srv_wiz_url self.catalog_url = kbase_endpoint + '/catalog' self.user_profile_url = kbase_endpoint + '/user_profile/rpc' self.provenance = provenance # initialize service clients self.ws_client = Workspace(self.workspace_url) self.cat_client = Catalog(self.catalog_url, auth_svc=self.auth_service_url) self.njs_client = NarrativeJobService(self.njsw_url, auth_svc=self.auth_service_url) self.ujs_client = UserAndJobState(self.job_service_url, auth_svc=self.auth_service_url) self.uprf_client = UserProfile(self.user_profile_url, auth_svc=self.auth_service_url) self.met_client = kb_Metrics(self.srv_wiz_url, token=token, auth_svc=self.auth_service_url, service_ver='dev') # self.met_url = 'https://ci.kbase.us/dynserv/feab1281c921b3a34f61cc8a11814eebf15c88d1.kb-Metrics' # self.met_client = kb_Metrics(url=self.met_url, auth_svc=self.auth_service_url, token=token) def get_user_metrics(self, input_params): """ get_user_metrics: call the dynamic service kb_Metrics to retrieve user metrics and return the following data structure, e.g., ... """ # log("Fetching the metrics data") ret_metrics = [] params = self.process_met_parameters(input_params) user_ids = params['user_ids'] time_start = params['minTime'] time_end = params['maxTime'] stats_name = params['stats_name'] try: if stats_name == 'user_details': ret_metrics = self.met_client.get_user_details({ 'user_ids': user_ids, 'epoch_range': (time_start, time_end) }) ret_metrics['metrics_result'] = _convert_millis_to_utcdate( ret_metrics['metrics_result'], ['signup_at', 'last_signin_at']) elif stats_name == 'user_counts_per_day': print("Trying to get unique user counts") ret_metrics = self.met_client.get_user_counts_per_day({ 'user_ids': user_ids, 'epoch_range': (time_start, time_end) }) elif stats_name == 'user_ws': ret_metrics = self.met_client.get_user_ws({ 'user_ids': user_ids, 'epoch_range': (time_start, time_end) }) elif stats_name == 'user_narratives': ret_metrics = self.met_client.get_user_narratives({ 'user_ids': user_ids, 'epoch_range': (time_start, time_end) }) elif stats_name == 'user_numObjs': ret_metrics = self.met_client.get_user_numObjs({ 'user_ids': user_ids, 'epoch_range': (time_start, time_end) }) elif stats_name == 'total_logins': ret_metrics = self.met_client.get_total_logins({ 'user_ids': user_ids, 'epoch_range': (time_start, time_end) }) else: pass except Exception as e_met: # RuntimeError log('UJS_CAT_NJS_DataUtils.get_user_metrics raised error:') log(e_met) return {'metrics_result': []} else: # no exception raised, process the data returned from the service call if (len(ret_metrics) > 1): log(pformat(ret_metrics[:2])) return ret_metrics def get_app_metrics(self, input_params): """ get_app_metrics: call the dynamic service kb_Metrics to retrieve app metrics """ # log("Fetching the metrics data") ret_metrics = [] params = self.process_met_parameters(input_params) user_ids = params['user_ids'] time_start = params['minTime'] time_end = params['maxTime'] try: ret_metrics = self.met_client.get_app_metrics({ 'user_ids': user_ids, 'epoch_range': (time_start, time_end) }) except Exception as e_met: # RuntimeError log('kb_Metrics.get_app_metrics raised error:') log(e_met) return [] else: # no exception raised, process the data returned from the service call if (len(ret_metrics) > 1): log(pformat(ret_metrics[:2])) return ret_metrics def generate_app_metrics_from_ujs(self, input_params): # , token): """ generate_app_metrics: get app job state data with structure as the following example: """ params = self.process_app_parameters(input_params) user_ids = params['user_ids'] time_start = params['time_start'] time_end = params['time_end'] job_stage = params['job_stage'] ws_owners, ws_ids = self.get_user_workspaces(user_ids, time_start, time_end, 0, 0) ujs_ret = self.get_user_and_job_states(ws_ids) total_ujs_count = len(ujs_ret) # log("Before time_stage filter:{}".format(total_ujs_count)) jt_filtered_ujs = self.filterUJS_by_time_stage(ujs_ret, job_stage, time_start, time_end) period_ujs_count = len(jt_filtered_ujs) jt_filtered_ujs = self.convert_time_info(jt_filtered_ujs) # log("After time_stage filter:{}".format(period_ujs_count)) # user_grouped_ujs = self.group_by_user(jt_filtered_ujs, user_ids) return {'job_states': jt_filtered_ujs} def get_user_workspaces(self, user_ids, st_time, ed_time, showDeleted=0, showOnlyDeleted=0): """ get_user_workspaces: given the user ids, get a list of data structure as the example below: typedef tuple<ws_id id, ws_name workspace, username owner, timestamp moddate, int max_objid, permission user_permission, permission globalread, lock_status lockstat, usermeta metadata> workspace_info; ws_info = self.ws_client.list_workspace_info({'owners':user_ids, 'showDeleted': showDeleted, 'showOnlyDeleted': showOnlyDeleted, 'perm':'r', 'excludeGlobal': 1, 'after': '2017-04-03T08:56:32Z', 'before': '2017-11-03T08:56:32Z' }) return a list of ws_owners and ws_ids """ # log("Fetching workspace ids for {} users:\n{}".format('the' if user_ids else 'all', user_ids if user_ids else '')) # ws_info = self.ws_client.list_workspace_info({}) ws_info = self.ws_client.list_workspace_info({ 'owners': user_ids, 'showDeleted': showDeleted, 'showOnlyDeleted': showOnlyDeleted, 'perm': 'r', 'after': st_time.strftime("%Y-%m-%dT%H:%M:%SZ"), 'before': ed_time.strftime("%Y-%m-%dT%H:%M:%SZ") }) # log(pformat(ws_info)) ws_ids = [ws[0] for ws in ws_info] ws_owners = [ws[2] for ws in ws_info] return (ws_owners, ws_ids) def get_user_and_job_states(self, ws_ids): """ get_user_and_job_states: Get the user and job info for the given workspaces """ # log("Fetching the job data...for these workspaces:\n{}".format(pformat(ws_ids))) wsj_states = [] clnt_groups = self.get_client_groups_from_cat() counter = 0 while counter < len(ws_ids) // 10: j_states = [] wid_slice = ws_ids[counter * 10:(counter + 1) * 10] wsj_states += self.retrieve_user_job_states(wid_slice, clnt_groups) counter += 1 wsj_states += self.retrieve_user_job_states(ws_ids[counter * 10:], clnt_groups) # log(pformat(wsj_states[0])) return wsj_states def retrieve_user_job_states(self, wid_p, c_groups): """ call ujs_client.list_jobs2() that returns an array of job_info2: typedef tuple<job_id job, user_info users, service_name service, job_stage stage, job_status status, time_info times, progress_info progress, boolean complete, boolean error, auth_info auth, usermeta meta, job_description desc, Results res> job_info2; retrieve_user_job_states: returns an array of required data items about user_and_job states """ # log("Fetching the ujs data for workspace(s) {}...".format(pformat(wid_p))) ret_ujs = [] try: nar_jobs = self.ujs_client.list_jobs2({ 'filter': 'S', #all jobs are returned 'authstrat': 'kbaseworkspace', 'authparams': wid_p }) except Exception as e_ujs: #RuntimeError as e_ujs: log('UJS list_jobs2 raised error:\n') log(pformat(e_ujs)) return [] else: # no exception raised if (nar_jobs and len(nar_jobs) > 0): # ******The ujs_client.list_jobs2({...}) returns a 13 member tuple:*****# job_ids = [j[0] for j in nar_jobs ] # [u'59f36d00e4b0fb0c767100cc',...] job_user_info = [ j[1] for j in nar_jobs ] #[ [u'qzhang', None],[u'qzhang', u'qzhang'],...] job_owners = [j[2] for j in nar_jobs] # [u'qzhang',u'qzhang',...] job_stages = [ j[3] for j in nar_jobs ] # One of 'created', 'started', 'complete', 'canceled' or 'error' job_status = [ j[4] for j in nar_jobs ] # [u'done','running','canceled by user','......',...] job_time_info = [ j[5] for j in nar_jobs ] # tuple<timestamp started, timestamp last_update,timestamp est_complete>[[u'2017-10-27T17:29:37+0000', u'2017-10-27T17:29:42+0000', None],...] job_progress_info = [ j[6] for j in nar_jobs ] #t uple<total_progress prog, max_progress max, progress_type ptype> job_complete = [j[7] for j in nar_jobs] # [1,1,...,0,..] job_error = [j[8] for j in nar_jobs] # [1,0,...,0,..] job_auth_info = [j[9] for j in nar_jobs ] # [[u'kbaseworkspace', u'25735'],...] job_meta = [ j[10] for j in nar_jobs ] # [{u'cell_id': u'828d2e3c-5c5d-4c4c-9de8-4aacb875c074',u'run_id': u'a05df5b3-2d3e-4e4a-9a32-173acaa9bd0c',u'tag': u'beta',u'token_id': u'2dea84eb-8f40-4516-b18e-f284cc6bb107'},...] job_desc = [ j[11] for j in nar_jobs ] #[ u'Execution engine job for kb_Metrics.count_ncbi_genome_features',...] job_res = [j[12] for j in nar_jobs] # [{},None,...] ret_ujs = self.retrieve_ujs_via_njs(c_groups, job_ids, job_owners, job_stages, job_status, job_time_info, job_error, job_desc) return ret_ujs def retrieve_ujs_via_njs(self, c_groups, job_ids, job_owners, job_stages, job_status, job_time_info, job_error, job_desc): ujs_ret = [] try: # log("Calling njs.check_jobs for {} jobs".format(len(job_ids))) job_info = self.njs_client.check_jobs({ 'job_ids': job_ids, 'with_job_params': 1 }) except Exception as e_njs: # RuntimeError as e_njs: log('NJS check_jobs raised error:\n') log(pformat(e_njs)) return [] else: # no exception raised job_states = job_info.get('job_states', {}) job_params = job_info.get('job_params', {}) job_errors = job_info.get('check_error', {}) # Retrieve the interested data from job_states to assemble an array of job states # for j_id, j_owner in zip(job_ids, job_owners): for j_idx, jb_id in enumerate(job_ids): jbs = job_states.get(job_ids[j_idx], {}) jbp = job_params.get(job_ids[j_idx], {}) u_j_s = {} u_j_s['job_id'] = job_ids[j_idx] u_j_s['user_id'] = job_owners[j_idx] u_j_s['status'] = job_status[j_idx] u_j_s['stage'] = job_stages[j_idx] u_j_s['time_info'] = job_time_info[j_idx] u_j_s['error'] = job_error[j_idx] u_j_s['job_desc'] = job_desc[j_idx] if jbs: try: u_j_s['app_id'] = jbp['app_id'] for clnt in c_groups: if u_j_s['app_id'] == clnt['app_id']: u_j_s['client_groups'] = clnt['client_groups'] break u_j_s['wsid'] = jbp['wsid'] u_j_s['module'], u_j_s['method'] = jbp['method'].split( '.') u_j_s['job_state'] = jbs['job_state'] if jbs['job_state'] == 'suspend': u_j_s['error'] = jbs['error'] elif (jbs['job_state'] == 'completed' and 'result' in u_j_s): u_j_s['result'] = jbs['result'] u_j_s['finished'] = jbs['finished'] u_j_s['canceled'] = jbs['canceled'] u_j_s['creation_time'] = jbs['creation_time'] if 'exec_start_time' in jbs: u_j_s['exec_start_time'] = jbs['exec_start_time'] elif u_j_s['stage'] == 'started': u_j_s['exec_start_time'] = u_j_s['time_info'][1] if 'finish_time' in jbs: u_j_s['finish_time'] = jbs['finish_time'] elif (u_j_s['stage'] == 'completed' or u_j_s['stage'] == 'complete'): u_j_s['finish_time'] = u_j_s['time_info'][1] except KeyError as e_key: log("KeyError for " + pformat(e_key)) else: pass else: #log("No job state info is returned by njs for job with id {}".format(job_ids[j_idx])) #log("\nBut maybe ujs has returned something for job with id {}".format(job_ids[j_idx])) #log(pformat(job_stages[j_idx])) u_j_s['creation_time'] = _timestamp_from_utc( u_j_s['time_info'][0]) if (u_j_s['stage'] == 'started' and u_j_s['status'] == 'running'): u_j_s['exec_start_time'] = _timestamp_from_utc( u_j_s['time_info'][1]) elif (u_j_s['stage'] == 'completed' or u_j_s['stage'] == 'complete' or u_j_s['job_state'] == 'completed' or u_j_s['status'] == 'done'): u_j_s['finish_time'] = _timestamp_from_utc( u_j_s['time_info'][1]) #get some info from the client groups for clnt in c_groups: if clnt['function_name'] in u_j_s['job_desc']: u_j_s['app_id'] = clnt['app_id'] u_j_s['client_groups'] = clnt['client_groups'] u_j_s['module'] = clnt['module_name'] u_j_s['method'] = clnt['function_name'] break #log("*******From ujs result directly*******:\n") #log(pformat(u_j_s)) if ('exec_start_time' in u_j_s and u_j_s['stage'] == 'started' and u_j_s['status'] == 'running'): delta = (datetime.datetime.utcnow() - datetime.datetime.fromtimestamp( u_j_s['exec_start_time'] / 1000)) delta = delta - datetime.timedelta( microseconds=delta.microseconds) u_j_s['running_time'] = str(delta) #delta.total_seconds() elif ('finish_time' in u_j_s and 'exec_start_time' in u_j_s and u_j_s['status'] == 'done'): delta = (datetime.datetime.fromtimestamp( u_j_s['finish_time'] / 1000) - datetime.datetime.fromtimestamp( u_j_s['exec_start_time'] / 1000)) delta = delta - datetime.timedelta( microseconds=delta.microseconds) u_j_s['run_time'] = str(delta) #delta.total_seconds() elif (u_j_s['stage'] == 'created' and 'creation_time' in u_j_s and u_j_s['status'] not in ['done', 'running', 'canceled by user', 'error'] and job_error[j_idx] == {}): delta = (datetime.datetime.utcnow() - datetime.datetime.fromtimestamp( u_j_s['creation_time'] / 1000)) delta = delta - datetime.timedelta( microseconds=delta.microseconds) u_j_s['queued_time'] = str(delta) #delta.total_seconds() u_j_s['status'] = 'queued' else: u_j_s['status'] = 'not created' ujs_ret.append(u_j_s) #log("Job count={}".format(len(ujs_ret))) return ujs_ret def get_exec_stats_from_cat(self): """ get_exec_stats_from_cat: Get stats on completed jobs return an array of the following structure (example with data): { u'app_id': u'describe_rnaseq_experiment', u'app_module_name': u'KBaseRNASeq', u'creation_time': 1456863947.568, u'exec_start_time': 1456863953.739, u'finish_time': 1456863955.138, u'func_module_name': u'KBaseRNASeq', u'func_name': u'SetupRNASeqAnalysis', u'git_commit_hash': u'5de844e7303a8a30a94d4ca40f2b341439b8bb3c', u'is_error': True, u'user_id': u'srividya22' } """ try: #log("Fetching the exec stats data from Catalog API...") raw_stats = self.cat_client.get_exec_raw_stats({}) except Exception as e_raw: #RuntimeError: log('kb_Metrics.get_exec_stats_from_cat raised error:') log(pformat(e_raw)) return [] else: # Calculate queued_time and run_time (in seconds) for elem in raw_stats: tc = elem['creation_time'] ts = elem['exec_start_time'] tf = elem['finish_time'] elem['queued_time'] = ts - tc elem['run_time'] = tf - ts log(pformat(raw_stats[0])) return raw_stats def get_client_groups_from_cat(self): """ get_client_groups_from_cat: Get the client_groups data from Catalog API return an array of the following structure (example with data): { u'app_id': u'assemblyrast/run_arast', u'client_groups': [u'bigmemlong'], u'function_name': u'run_arast', u'module_name': u'AssemblyRAST'}, } """ # Pull the data client_groups = self.cat_client.get_client_groups({}) #log("\nClient group example:\n{}".format(pformat(client_groups[0]))) return client_groups def get_exec_aggrTable_from_cat(self): """ get_exec_stats_from_cat: Get stats on completed jobs return an array of the following structure (example with data): { u'app': u'kb_uploadmethods/import_sra_as_reads_from_web', u'func': u'import_sra_from_web', u'func_mod': u'kb_uploadmethods', u'n': 5, u'user': u'umaganapathyswork' } """ try: #log("Fetching the exec_aggr table data from Catalog API...") aggr_tab = self.cat_client.get_exec_aggr_table({}) except Exception as e_aggr: #RuntimeError: log('kb_Metrics.get_exec_aggrTable_from_cat raised error:') log(pformat(e_aggr)) return [] else: log(pformat(aggr_tab[0])) return aggr_tab def get_exec_aggrStats_from_cat(self): """ get_exec_aggr_from_cat: Get stats on aggregated execution results of KBase apps return an array of the following structure (example with data): { u'full_app_id': u'KBaseRNASeq/describe_rnaseq_experiment', u'module_name': u'KBaseRNASeq', u'number_of_calls': 689, u'number_of_errors': 117, u'time_range': u'*', u'total_exec_time': 10.807103612158034, u'total_queue_time': 127.90380222181479, u'type': u'a' } """ # Pull the data try: # log("Fetching the exec_aggr stats data from Catalog API...") aggr_stats = self.cat_client.get_exec_aggr_stats({}) except Exception as e_aggr: # RuntimeError: log('kb_Metrics.get_exec_aggrStats_from_cat raised error:') log(pformat(e_aggr)) return [] else: # Convert time from seconds to hours for kb_mod in aggr_stats: te = kb_mod['total_exec_time'] tq = kb_mod['total_queue_time'] kb_mod['total_exec_time'] = te / 3600 kb_mod['total_queue_time'] = tq / 3600 log(pformat(aggr_stats[0])) return aggr_stats def get_module_stats_from_cat(self): """ get_module_stats_from_cat: Get stats on Modules """ # Pull the data log("Fetching the module stats data from Catalog API...") now = time.time() kb_modules = dict() for kb_module in self.cat_client.list_basic_module_info( {'include_unreleased': True}): name = kb_module['module_name'] v = self.cat_client.get_module_info({'module_name': name})['beta'] vers = self.cat_client.list_released_module_versions( {'module_name': name}) s = 'b' if len(vers) > 0: v = vers[0] s = 'r' if v is None: continue ct = len(v['narrative_methods']) days = (v['timestamp'] / 1000) / 3600 / 24 # print '%-40s %3d %3d' %(kb_module['module_name'],days,ct) kb_modules['%s:%d:%s' % (name, ct, s)] = days # log(pformat(kb_modules)) # Generate time based summaries sorted_x = sorted(kb_modules, key=lambda i: int(kb_modules[i])) mods = dict() apps = dict() rmods = dict() rapps = dict() for bucket in range(184, 300): mods[bucket] = 0 apps[bucket] = 0 rmods[bucket] = 0 rapps[bucket] = 0 for m in sorted_x: (name, ct, s) = m.split(':') d = kb_modules[m] bucket = int(d / 91.25) if bucket not in mods: mods[bucket] = 0 apps[bucket] = 0 rmods[bucket] = 0 rapps[bucket] = 0 mods[bucket] += 1 apps[bucket] += int(ct) if s == 'r': rmods[bucket] += 1 rapps[bucket] += int(ct) # print '%-40s %3d %3d' %(name,int(ct),kb_modules[m]) # Modules by Quarter tmods = 0 tapps = 0 trmods = 0 trapps = 0 Q = 1 Y = 16 labels = dict() bucket = 184 for year in range(16, 21): for quarter in range(1, 5): labels[bucket] = 'Q%d-%2d' % (quarter, year) bucket += 1 for b in range(184, 191): tmods += mods[b] tapps += apps[b] trmods += rmods[b] trapps += rapps[b] print '%5s %3d %3d %3d %3d %3d %3d' % ( labels[b], tmods, tapps, trmods, trapps, tmods - trmods, tapps - trapps) return kb_modules def group_by_user(self, job_sts, user_ids): grouped_ujs = [] if user_ids == []: return {'user_id': 'all_users', 'job_states': job_sts} for uid in user_ids: ujs_by_user = [] for ujs_i in job_sts: if uid == ujs_i['user_id']: ujs_by_user.append(ujs_i) if len(ujs_by_user) > 0: grouped_ujs.append({'user_id': uid, 'job_states': ujs_by_user}) return grouped_ujs def filterUJS_by_time_stage(self, job_sts, j_stage, j_start_time, j_end_time): filtered_ujs = [] for ujs_i in job_sts: if isinstance(ujs_i['creation_time'], int): cr_time = datetime.datetime.utcfromtimestamp( ujs_i['creation_time'] / 1000) else: cr_time = _datetime_from_utc(ujs_i['creation_time']) #log("Comparing {} between {} and {}".format(str(cr_time), str(j_start_time), str(j_end_time))) if (cr_time <= j_end_time and cr_time >= j_start_time): if (j_stage == 'all' or j_stage == ujs_i['stage']): filtered_ujs.append(ujs_i) return filtered_ujs def convert_time_info(self, ujs_arr): # convert time_info from [utc_string, utc_string, utc_string] to [epoch_timestamp*3] for u_j_s in ujs_arr: if u_j_s['time_info']: # log("Before {}".format(pformat(u_j_s['time_info']))) u_j_s['time_info'] = [ _timestamp_from_utc(t_j) if t_j else None for t_j in u_j_s['time_info'] ] # log("After {}".format(pformat(u_j_s['time_info']))) return ujs_arr def init_clients_withToken(self, token): token = token if token else os.environ['KB_AUTH_TOKEN'] self.ws_client = Workspace(self.workspace_url, token=token) self.cat_client = Catalog(self.catalog_url, auth_svc=self.auth_service_url, token=token) self.njs_client = NarrativeJobService(self.njsw_url, auth_svc=self.auth_service_url, token=token) self.ujs_client = UserAndJobState(self.job_service_url, auth_svc=self.auth_service_url, token=token) self.uprf_client = UserProfile(self.user_profile_url, auth_svc=self.auth_service_url, token=token) def process_app_parameters(self, params): if params.get('user_ids', None) is None: params['user_ids'] = [] else: if not isinstance(params['user_ids'], list): raise ValueError('Variable user_ids' + ' must be a list.') if not params.get('time_range', None) is None: time_start, time_end = params['time_range'] params['time_start'] = _convert_to_datetime(time_start) params['time_end'] = _convert_to_datetime(time_end) else: # set the most recent 48 hours range params['time_end'] = datetime.datetime.utcnow() params['time_start'] = params['time_end'] - datetime.timedelta( hours=48) if params.get('job_stage', None) is None: params['job_stage'] = 'all' if params['job_stage'] == 'completed': params['job_stage'] = 'complete' return params def process_user_parameters(self, params): if params.get('filter_str', None) is None: params['filter_str'] = '' else: if not isinstance(params['filter_str'], str): raise ValueError('Variable filter_str' + ' must be a string.') if not params.get('time_range', None) is None: time_start, time_end = params['time_range'] params['time_start'] = _convert_to_datetime(time_start) params['time_end'] = _convert_to_datetime(time_end) else: # set the most recent quarter (90 days) params['time_end'] = datetime.datetime.utcnow() params['time_start'] = params['time_end'] - datetime.timedelta( days=90) return params def generate_user_metrics(self, input_params): """ generate_user_metrics: get user data with structure as the following example: [ {'creation_time': '2017-09-04 15:46:56.387000', 'user_data': {u'department': u'Biotechnology and food science', u'organization': u'NTNU'}, 'user_name': {u'realname': u'Vetle Simensen', u'username': u'vetle'}}, {'creation_time': '2017-09-06 21:45:43.251000', 'user_data': {u'department': u'Department of Medicine', u'organization': u'University of Chicago'}, 'user_name': {u'realname': u'\xd6zcan Esen', u'username': u'ozcan'}}, {'creation_time': '2017-08-30 21:47:51.711000', 'user_data': {u'department': u'Plant and Microbial Biology', u'organization': u'University of California-Berkeley'}, 'user_name': {u'realname': u'Daniel Westcott', u'username': u'westcott'}}, ...... ] """ params = self.process_user_parameters(input_params) user_filter = params['filter_str'] time_start = params['time_start'] time_end = params['time_end'] kb_users = self.get_user_names(user_filter) user_names = [] real_names = [] for u in kb_users: user_names.append(u['username']) real_names.append(u['realname']) kb_uprof = self.get_user_profiles(user_names) total_user_count = len(kb_uprof) log("Before time range filter:{}".format(total_user_count)) if (time_start is not None or time_end is not None): kb_uprof = self.filterUPROF_by_time_stage(kb_uprof, time_start, time_end) period_user_count = len(kb_uprof) log("After time range filter:{}".format(period_user_count)) return {'user_metrics': kb_uprof} def get_user_names(self, filter_str): """ get_user_names: given a filter string, get a list of User of structure as below: typedef structure { username username; realname realname; string thumbnail; } User; """ log("Fetching user name details for {} users\n{}".format( 'the' if filter_str else 'all', 'with id containing ' + filter_str if filter_str else '')) user_names = self.uprf_client.filter_users({'filter': filter_str}) # log(pformat(user_names)) return user_names def get_user_profiles(self, user_ids): """ get_user_profiles: given the user ids, get a list of UserProfile of structure as below: typedef structure { username username; realname realname; string thumbnail; } User; typedef structure { User user; UnspecifiedObject profile; } UserProfile; example returned data: [ {u'profile': {u'metadata': {u'created': u'2017-11-28T02:52:28.492Z', u'createdBy': u'userprofile_ui_service'}, u'preferences': {}, u'synced': {u'gravatarHash': u'81793127ae5301c545a054846941c061'}, u'userdata': {u'department': u'Physics', u'organization': u'University of Illinois at Urbana-Champaign'} }, u'user': {u'realname': u'Karna Gowda', u'username': u'karnagowda'} }, {u'profile': {u'metadata': {u'created': u'2017-11-28T04:06:14.371Z', u'createdBy': u'userprofile_ui_service'}, u'preferences': {}, u'synced': {u'gravatarHash': u'370bb047fc197fd60921eaf5d1683acf'}, u'userdata': {u'department': u'Spirit Youth', u'organization': u'WJS Canada'} }, u'user': {u'realname': u'Nicole McMillan', u'username': u'n_mcmillan'} }, ....... ] """ log("Fetching profile info for {} users:\n".format( len(user_ids) if user_ids else 'all')) user_prof = self.uprf_client.get_user_profile(user_ids) log(pformat(user_prof)) return user_prof def filterUPROF_by_time_stage(self, user_prof, j_start_time, j_end_time): """ example input data for user_prof: [ {u'profile': {u'metadata': {u'created': u'2017-11-28T02:52:28.492Z', u'createdBy': u'userprofile_ui_service'}, u'preferences': {}, u'synced': {u'gravatarHash': u'81793127ae5301c545a054846941c061'}, u'userdata': {u'department': u'Physics', u'organization': u'University of Illinois at Urbana-Champaign'} }, u'user': {u'realname': u'Karna Gowda', u'username': u'karnagowda'} }, {u'profile': {u'metadata': {u'created': u'2017-11-28T04:06:14.371Z', u'createdBy': u'userprofile_ui_service'}, u'preferences': {}, u'synced': {u'gravatarHash': u'370bb047fc197fd60921eaf5d1683acf'}, u'userdata': {u'department': u'Spirit Youth', u'organization': u'WJS Canada'} }, u'user': {u'realname': u'Nicole McMillan', u'username': u'n_mcmillan'} }, ....... ] """ filtered_uprof = [] for u_i in user_prof: u_crt = u_i['profile']['metadata']['created'] if isinstance(u_crt, int): cr_time = datetime.datetime.utcfromtimestamp(u_crt / 1000) else: cr_time = _datetime_from_utc(u_crt) #log("Comparing {} between {} and {}".format(str(cr_time), str(j_start_time), str(j_end_time))) if (cr_time <= j_end_time and cr_time >= j_start_time): filtered_uprof.append({ 'user_name': u_i['user'], 'creation_time': str(cr_time), 'user_data': u_i['profile']['userdata'] }) return filtered_uprof def process_met_parameters(self, params): if params.get('user_ids', None) is None: params['user_ids'] = [] else: if not isinstance(params['user_ids'], list): raise ValueError('Variable user_ids' + ' must be a list.') if 'kbasetest' in params['user_ids']: params['user_ids'].remove('kbasetest') if (not params.get('start_time', None) is None and not params.get('end_time', None) is None): params['start_time'] = _convert_to_datetime(params['start_time']) params['end_time'] = _convert_to_datetime(params['end_time']) params['minTime'] = _unix_time_millis_from_datetime( params['start_time']) params['maxTime'] = _unix_time_millis_from_datetime( params['end_time']) elif (not params.get('start_time', None) is None and params.get('end_time', None) is None): params['start_time'] = _convert_to_datetime(params['start_time']) params['end_time'] = params['start_time'] + datetime.timedelta( hours=48) params['minTime'] = _unix_time_millis_from_datetime( params['start_time']) params['maxTime'] = _unix_time_millis_from_datetime( params['end_time']) elif (params.get('start_time', None) is None and not params.get('end_time', None) is None): params['end_time'] = _convert_to_datetime(params['end_time']) params['start_time'] = params['end_time'] - datetime.timedelta( hours=48) params['minTime'] = _unix_time_millis_from_datetime( params['start_time']) params['maxTime'] = _unix_time_millis_from_datetime( params['end_time']) else: #set the most recent 48 hours range maxTime = datetime.datetime.utcnow() minTime = maxTime - datetime.timedelta(hours=48) params['minTime'] = _unix_time_millis_from_datetime(minTime) params['maxTime'] = _unix_time_millis_from_datetime(maxTime) return params