Пример #1
0
 def check_feature_cache(self, ref, token):
     ws_client = Workspace(self.ws_url, token=token)
     info = ws_client.get_object_info_new({"objects": [{"ref": ref}]})[0]
     inner_chsum = info[8]
     index_file = os.path.join(self.genome_index_dir,
                               inner_chsum + "_ftr.tsv.gz")
     if not os.path.isfile(index_file):
         if self.debug:
             print("    Loading WS object...")
         t1 = time.time()
         incl = [
             x + y for x, y in product([
                 'features/[*]/', 'cdss/[*]/', 'mrnas/[*]/',
                 'non_coding_features/[*]/'
             ], [
                 "id", "type", "function", "functions", "aliases",
                 "location", "ontology_terms"
             ])
         ] + ['ontologies_present']
         genome = self.get_one_genome(
             {"objects": [{
                 "ref": ref,
                 "included": incl
             }]}, token)
         self.save_feature_tsv(genome, inner_chsum)
         if self.debug:
             print(("    (time=" + str(time.time() - t1) + ")"))
     return inner_chsum
    def _get_ws_info(self, obj_ref):

        ws = Workspace(self.ws_url)
        try:
            info = ws.get_object_info_new({'objects': [{'ref': obj_ref}]})[0]
        except WorkspaceError as wse:
            self.__LOGGER.error('Logging workspace exception')
            self.__LOGGER.error(str(wse))
            raise
        return info
class NarrativeManager:

    KB_CELL = 'kb-cell'
    KB_TYPE = 'type'
    KB_APP_CELL = 'kb_app'
    KB_FUNCTION_CELL = 'function_input'
    KB_OUTPUT_CELL = 'function_output'
    KB_ERROR_CELL = 'kb_error'
    KB_CODE_CELL = 'kb_code'
    KB_STATE = 'widget_state'

    DEBUG = False

    DATA_PALETTES_TYPES = DataPaletteTypes(False)

    def __init__(self, config, ctx, set_api_cache, dps_cache):
        self.narrativeMethodStoreURL = config['narrative-method-store']
        self.set_api_cache = set_api_cache  # DynamicServiceCache type
        self.dps_cache = dps_cache  # DynamicServiceCache type
        self.token = ctx["token"]
        self.user_id = ctx["user_id"]
        self.ws = Workspace(config['workspace-url'], token=self.token)
        self.intro_md_file = config['intro-markdown-file']
        # We switch DPs on only for internal Continuous Integration environment for now:
        if config['kbase-endpoint'].startswith("https://ci.kbase.us/"):
            self.DATA_PALETTES_TYPES = DataPaletteTypes(True)

    def list_objects_with_sets(self,
                               ws_id=None,
                               ws_name=None,
                               workspaces=None,
                               types=None,
                               include_metadata=0):
        if not workspaces:
            if (not ws_id) and (not ws_name):
                raise ValueError(
                    "One and only one of 'ws_id', 'ws_name', 'workspaces' " +
                    "parameters should be set")
            workspaces = [self._get_workspace_name_or_id(ws_id, ws_name)]
        return self._list_objects_with_sets(workspaces, types,
                                            include_metadata)

    def _list_objects_with_sets(self, workspaces, types, include_metadata):
        type_map = None
        if types is not None:
            type_map = {key: True for key in types}

        processed_refs = {}
        data = []
        if self.DEBUG:
            print("NarrativeManager._list_objects_with_sets: processing sets")
        t1 = time.time()
        set_ret = self.set_api_cache.call_method(
            "list_sets", [{
                'workspaces': workspaces,
                'include_set_item_info': 1,
                'include_raw_data_palettes': 1,
                'include_metadata': include_metadata
            }], self.token)
        sets = set_ret['sets']
        dp_data = set_ret.get('raw_data_palettes')
        dp_refs = set_ret.get('raw_data_palette_refs')
        for set_info in sets:
            # Process
            target_set_items = []
            for set_item in set_info['items']:
                target_set_items.append(set_item['info'])
            if self._check_info_type(set_info['info'], type_map):
                data_item = {
                    'object_info': set_info['info'],
                    'set_items': {
                        'set_items_info': target_set_items
                    }
                }
                data.append(data_item)
                processed_refs[set_info['ref']] = data_item
        if self.DEBUG:
            print("    (time=" + str(time.time() - t1) + ")")

        if self.DEBUG:
            print("NarrativeManager._list_objects_with_sets: loading ws_info")
        t2 = time.time()
        ws_info_list = []
        #for ws in workspaces:
        if len(workspaces) == 1:
            ws = workspaces[0]
            ws_id = None
            ws_name = None
            if str(ws).isdigit():
                ws_id = int(ws)
            else:
                ws_name = str(ws)
            ws_info_list.append(
                self.ws.get_workspace_info({
                    "id": ws_id,
                    "workspace": ws_name
                }))
        else:
            ws_map = {key: True for key in workspaces}
            for ws_info in self.ws.list_workspace_info({'perm': 'r'}):
                if ws_info[1] in ws_map or str(ws_info[0]) in ws_map:
                    ws_info_list.append(ws_info)
        if self.DEBUG:
            print("    (time=" + str(time.time() - t2) + ")")

        if self.DEBUG:
            print(
                "NarrativeManager._list_objects_with_sets: loading workspace objects"
            )
        t3 = time.time()
        for info in WorkspaceListObjectsIterator(
                self.ws,
                ws_info_list=ws_info_list,
                list_objects_params={'includeMetadata': include_metadata}):
            item_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
            if item_ref not in processed_refs and self._check_info_type(
                    info, type_map):
                data_item = {'object_info': info}
                data.append(data_item)
                processed_refs[item_ref] = data_item
        if self.DEBUG:
            print("    (time=" + str(time.time() - t3) + ")")

        if self.DEBUG:
            print(
                "NarrativeManager._list_objects_with_sets: processing DataPalettes"
            )
        t5 = time.time()
        if dp_data is None or dp_refs is None:
            dps = self.dps_cache
            dp_ret = dps.call_method("list_data",
                                     [{
                                         'workspaces': workspaces,
                                         'include_metadata': include_metadata
                                     }], self.token)
            dp_data = dp_ret['data']
            dp_refs = dp_ret['data_palette_refs']
        for item in dp_data:
            ref = item['ref']
            if self._check_info_type(item['info'], type_map):
                data_item = None
                if ref in processed_refs:
                    data_item = processed_refs[ref]
                else:
                    data_item = {'object_info': item['info']}
                    processed_refs[ref] = data_item
                    data.append(data_item)
                dp_info = {}
                if 'dp_ref' in item:
                    dp_info['ref'] = item['dp_ref']
                if 'dp_refs' in item:
                    dp_info['refs'] = item['dp_refs']
                data_item['dp_info'] = dp_info
        if self.DEBUG:
            print("    (time=" + str(time.time() - t5) + ")")
        return {"data": data, 'data_palette_refs': dp_refs}

    def _check_info_type(self, info, type_map):
        if type_map is None:
            return True
        obj_type = info[2].split('-')[0]
        return type_map.get(obj_type, False)

    def copy_narrative(self, newName, workspaceRef, workspaceId):
        time_ms = int(round(time.time() * 1000))
        newWsName = self.user_id + ':narrative_' + str(time_ms)
        # add the 'narrative' field to newWsMeta later.
        newWsMeta = {"is_temporary": "false", "narrative_nice_name": newName}

        # start with getting the existing narrative object.
        currentNarrative = self.ws.get_objects([{'ref': workspaceRef}])[0]
        if not workspaceId:
            workspaceId = currentNarrative['info'][6]
        # Let's prepare exceptions for clone the workspace.
        # 1) currentNarrative object:
        excluded_list = [{'objid': currentNarrative['info'][0]}]
        # 2) let's exclude objects of types under DataPalette handling:
        data_palette_type = "DataPalette.DataPalette"
        excluded_types = [data_palette_type]
        excluded_types.extend(self.DATA_PALETTES_TYPES.keys())
        add_to_palette_list = []
        dp_detected = False
        for obj_type in excluded_types:
            list_objects_params = {'type': obj_type}
            if obj_type == data_palette_type:
                list_objects_params['showHidden'] = 1
            for info in WorkspaceListObjectsIterator(
                    self.ws,
                    ws_id=workspaceId,
                    list_objects_params=list_objects_params):
                if obj_type == data_palette_type:
                    dp_detected = True
                else:
                    add_to_palette_list.append({
                        'ref':
                        str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
                    })
                excluded_list.append({'objid': info[0]})
        # clone the workspace EXCEPT for currentNarrative object + obejcts of DataPalette types:
        newWsId = self.ws.clone_workspace({
            'wsi': {
                'id': workspaceId
            },
            'workspace': newWsName,
            'meta': newWsMeta,
            'exclude': excluded_list
        })[0]
        try:
            if dp_detected:
                self.dps_cache.call_method(
                    "copy_palette", [{
                        'from_workspace': str(workspaceId),
                        'to_workspace': str(newWsId)
                    }], self.token)
            if len(add_to_palette_list) > 0:
                # There are objects in source workspace that have type under DataPalette handling
                # but these objects are physically stored in source workspace rather that saved
                # in DataPalette object. So they weren't copied by "dps.copy_palette".
                self.dps_cache.call_method("add_to_palette",
                                           [{
                                               'workspace': str(newWsId),
                                               'new_refs': add_to_palette_list
                                           }], self.token)

            # update the ref inside the narrative object and the new workspace metadata.
            newNarMetadata = currentNarrative['info'][10]
            newNarMetadata['name'] = newName
            newNarMetadata['ws_name'] = newWsName
            newNarMetadata['job_info'] = json.dumps({
                'queue_time': 0,
                'running': 0,
                'completed': 0,
                'run_time': 0,
                'error': 0
            })

            currentNarrative['data']['metadata']['name'] = newName
            currentNarrative['data']['metadata']['ws_name'] = newWsName
            currentNarrative['data']['metadata']['job_ids'] = {
                'apps': [],
                'methods': [],
                'job_usage': {
                    'queue_time': 0,
                    'run_time': 0
                }
            }
            # save the shiny new Narrative so it's at version 1
            newNarInfo = self.ws.save_objects({
                'id':
                newWsId,
                'objects': [{
                    'type': currentNarrative['info'][2],
                    'data': currentNarrative['data'],
                    'provenance': currentNarrative['provenance'],
                    'name': currentNarrative['info'][1],
                    'meta': newNarMetadata
                }]
            })
            # now, just update the workspace metadata to point
            # to the new narrative object
            newNarId = newNarInfo[0][0]
            self.ws.alter_workspace_metadata({
                'wsi': {
                    'id': newWsId
                },
                'new': {
                    'narrative': str(newNarId)
                }
            })
            return {'newWsId': newWsId, 'newNarId': newNarId}
        except:
            # let's delete copy of workspace so it's out of the way - it's broken
            self.ws.delete_workspace({'id': newWsId})
            raise  # continue raising previous exception

    def create_new_narrative(self, app, method, appparam, appData, markdown,
                             copydata, importData, includeIntroCell):
        if app and method:
            raise ValueError(
                "Must provide no more than one of the app or method params")

        if (not importData) and copydata:
            importData = copydata.split(';')

        if (not appData) and appparam:
            appData = []
            for tmp_item in appparam.split(';'):
                tmp_tuple = tmp_item.split(',')
                step_pos = None
                if tmp_tuple[0]:
                    try:
                        step_pos = int(tmp_tuple[0])
                    except ValueError:
                        pass
                appData.append([step_pos, tmp_tuple[1], tmp_tuple[2]])
        cells = None
        if app:
            cells = [{"app": app}]
        elif method:
            cells = [{"method": method}]
        elif markdown:
            cells = [{"markdown": markdown}]
        return self._create_temp_narrative(cells, appData, importData,
                                           includeIntroCell)

    def _get_intro_markdown(self):
        """
        Creates and returns a cell with the introductory text included.
        """
        # Load introductory markdown text
        with open(self.intro_md_file) as intro_file:
            intro_md = intro_file.read()
        return intro_md

    def _create_temp_narrative(self, cells, parameters, importData,
                               includeIntroCell):
        # Migration to python of JavaScript class from https://github.com/kbase/kbase-ui/blob/4d31151d13de0278765a69b2b09f3bcf0e832409/src/client/modules/plugins/narrativemanager/modules/narrativeManager.js#L414
        narr_id = int(round(time.time() * 1000))
        workspaceName = self.user_id + ':narrative_' + str(narr_id)
        narrativeName = "Narrative." + str(narr_id)

        ws = self.ws
        ws_info = ws.create_workspace({
            'workspace': workspaceName,
            'description': ''
        })
        newWorkspaceInfo = ServiceUtils.workspaceInfoToObject(ws_info)
        [narrativeObject, metadataExternal
         ] = self._fetchNarrativeObjects(workspaceName, cells, parameters,
                                         includeIntroCell)
        objectInfo = ws.save_objects({
            'workspace':
            workspaceName,
            'objects': [{
                'type':
                'KBaseNarrative.Narrative',
                'data':
                narrativeObject,
                'name':
                narrativeName,
                'meta':
                metadataExternal,
                'provenance': [{
                    'script':
                    'NarrativeManager.py',
                    'description':
                    'Created new ' + 'Workspace/Narrative bundle.'
                }],
                'hidden':
                0
            }]
        })[0]
        objectInfo = ServiceUtils.objectInfoToObject(objectInfo)
        self._completeNewNarrative(newWorkspaceInfo['id'], objectInfo['id'],
                                   importData)
        return {'workspaceInfo': newWorkspaceInfo, 'narrativeInfo': objectInfo}

    def _fetchNarrativeObjects(self, workspaceName, cells, parameters,
                               includeIntroCell):
        if not cells:
            cells = []
        # fetchSpecs
        appSpecIds = []
        methodSpecIds = []
        specMapping = {'apps': {}, 'methods': {}}
        for cell in cells:
            if 'app' in cell:
                appSpecIds.append(cell['app'])
            elif 'method' in cell:
                methodSpecIds.append(cell['method'])
        nms = NarrativeMethodStore(self.narrativeMethodStoreURL,
                                   token=self.token)
        if len(appSpecIds) > 0:
            appSpecs = nms.get_app_spec({'ids': appSpecIds})
            for spec in appSpecs:
                spec_id = spec['info']['id']
                specMapping['apps'][spec_id] = spec
        if len(methodSpecIds) > 0:
            methodSpecs = nms.get_method_spec({'ids': methodSpecIds})
            for spec in methodSpecs:
                spec_id = spec['info']['id']
                specMapping['methods'][spec_id] = spec
        # end of fetchSpecs
        metadata = {
            'job_ids': {
                'methods': [],
                'apps': [],
                'job_usage': {
                    'queue_time': 0,
                    'run_time': 0
                }
            },
            'format': 'ipynb',
            'creator': self.user_id,
            'ws_name': workspaceName,
            'name': 'Untitled',
            'type': 'KBaseNarrative.Narrative',
            'description': '',
            'data_dependencies': []
        }
        cellData = self._gatherCellData(cells, specMapping, parameters,
                                        includeIntroCell)
        narrativeObject = {
            'nbformat_minor': 0,
            'cells': cellData,
            'metadata': metadata,
            'nbformat': 4
        }
        metadataExternal = {}
        for key in metadata:
            value = metadata[key]
            if isinstance(value, basestring):
                metadataExternal[key] = value
            else:
                metadataExternal[key] = json.dumps(value)
        return [narrativeObject, metadataExternal]

    def _gatherCellData(self, cells, specMapping, parameters,
                        includeIntroCell):
        cell_data = []
        if includeIntroCell == 1:
            cell_data.append({
                'cell_type': 'markdown',
                'source': self._get_intro_markdown(),
                'metadata': {}
            })
        for cell_pos, cell in enumerate(cells):
            if 'app' in cell:
                cell_data.append(
                    self._buildAppCell(len(cell_data),
                                       specMapping['apps'][cell['app']],
                                       parameters))
            elif 'method' in cell:
                cell_data.append(
                    self._buildMethodCell(
                        len(cell_data), specMapping['methods'][cell['method']],
                        parameters))
            elif 'markdown' in cell:
                cell_data.append({
                    'cell_type': 'markdown',
                    'source': cell['markdown'],
                    'metadata': {}
                })
            else:
                raise ValueError("cannot add cell #" + str(cell_pos) +
                                 ", unrecognized cell content")
        return cell_data

    def _buildAppCell(self, pos, spec, params):
        cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4())
        cell = {
            'cell_type':
            'markdown',
            'source':
            "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" +
            cellId + "').kbaseNarrativeAppCell({'appSpec' : '" +
            self._safeJSONStringify(spec) + "', 'cellId' : '" + cellId +
            "'});" + "</script>",
            'metadata': {}
        }
        cellInfo = {}
        widgetState = []
        cellInfo[self.KB_TYPE] = self.KB_APP_CELL
        cellInfo['app'] = spec
        if params:
            steps = {}
            for param in params:
                stepid = 'step_' + str(param[0])
                if stepid not in steps:
                    steps[stepid] = {}
                    steps[stepid]['inputState'] = {}
                steps[stepid]['inputState'][param[1]] = param[2]
            state = {
                'state': {
                    'step': steps
                }
            }
            widgetState.append(state)
        cellInfo[self.KB_STATE] = widgetState
        cell['metadata'][self.KB_CELL] = cellInfo
        return cell

    def _buildMethodCell(self, pos, spec, params):
        cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4())
        cell = {
            'cell_type':
            'markdown',
            'source':
            "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" +
            cellId + "').kbaseNarrativeMethodCell({'method' : '" +
            self._safeJSONStringify(spec) + "'});" + "</script>",
            'metadata': {}
        }
        cellInfo = {'method': spec, 'widget': spec['widgets']['input']}
        cellInfo[self.KB_TYPE] = self.KB_FUNCTION_CELL
        widgetState = []
        if params:
            wparams = {}
            for param in params:
                wparams[param[1]] = param[2]
            widgetState.append({'state': wparams})
        cellInfo[self.KB_STATE] = widgetState
        cell['metadata'][self.KB_CELL] = cellInfo
        return cell

    def _completeNewNarrative(self, workspaceId, objectId, importData):
        self.ws.alter_workspace_metadata({
            'wsi': {
                'id': workspaceId
            },
            'new': {
                'narrative': str(objectId),
                'is_temporary': 'true'
            }
        })
        # copy_to_narrative:
        if not importData:
            return
        objectsToCopy = [{'ref': x} for x in importData]
        infoList = self.ws.get_object_info_new({
            'objects': objectsToCopy,
            'includeMetadata': 0
        })
        for item in infoList:
            objectInfo = ServiceUtils.objectInfoToObject(item)
            self.copy_object(objectInfo['ref'], workspaceId, None, None,
                             objectInfo)

    def _safeJSONStringify(self, obj):
        return json.dumps(self._safeJSONStringifyPrepare(obj))

    def _safeJSONStringifyPrepare(self, obj):
        if isinstance(obj, basestring):
            return obj.replace("'", "&apos;").replace('"', "&quot;")
        elif isinstance(obj, list):
            for pos in range(len(obj)):
                obj[pos] = self._safeJSONStringifyPrepare(obj[pos])
        elif isinstance(obj, dict):
            obj_keys = list(obj.keys())
            for key in obj_keys:
                obj[key] = self._safeJSONStringifyPrepare(obj[key])
        else:
            pass  # it's boolean/int/float/None
        return obj

    def _get_workspace_name_or_id(self, ws_id, ws_name):
        ret = ws_name
        if not ret:
            ret = str(ws_id)
        return ret

    def copy_object(self, ref, target_ws_id, target_ws_name, target_name,
                    src_info):
        # There should be some logic related to DataPalettes
        if (not target_ws_id) and (not target_ws_name):
            raise ValueError("Neither target workspace ID nor name is defined")
        if not src_info:
            src_info_tuple = self.ws.get_object_info_new({
                'objects': [{
                    'ref': ref
                }],
                'includeMetadata':
                0
            })[0]
            src_info = ServiceUtils.objectInfoToObject(src_info_tuple)
        type_name = src_info['typeModule'] + '.' + src_info['typeName']
        type_config = self.DATA_PALETTES_TYPES.get(type_name)
        if type_config is not None:
            # Copy with DataPaletteService
            if target_name:
                raise ValueError(
                    "'target_name' cannot be defined for DataPalette copy")
            target_ws_name_or_id = self._get_workspace_name_or_id(
                target_ws_id, target_ws_name)
            self.dps_cache.call_method("add_to_palette",
                                       [{
                                           'workspace': target_ws_name_or_id,
                                           'new_refs': [{
                                               'ref': ref
                                           }]
                                       }], self.token)
            return {'info': src_info}
        else:
            if not target_name:
                target_name = src_info['name']
            obj_info_tuple = self.ws.copy_object({
                'from': {
                    'ref': ref
                },
                'to': {
                    'wsid': target_ws_id,
                    'workspace': target_ws_name,
                    'name': target_name
                }
            })
            obj_info = ServiceUtils.objectInfoToObject(obj_info_tuple)
            return {'info': obj_info}

    def list_available_types(self, workspaces):
        data = self.list_objects_with_sets(workspaces=workspaces)['data']
        type_stat = {}
        for item in data:
            info = item['object_info']
            obj_type = info[2].split('-')[0]
            if obj_type in type_stat:
                type_stat[obj_type] += 1
            else:
                type_stat[obj_type] = 1
        return {'type_stat': type_stat}
Пример #4
0
class Service:
    def __init__(self, fba_url, ws_url, ctx):
        self.ws_client = Workspace(ws_url, token=ctx['token'])
        self.fba_client = fba_tools(fba_url)

    def get_object(self, objid, wsid, name=None):
        """
        Returns an object and it's associated KBase information

        Returns an ObjectData (dictionary) like what is returned in the workspace service 'get_objects' function:

        /* The data and supplemental info for an object.

            UnspecifiedObject data - the object's data or subset data.
            object_info info - information about the object.
            list<ProvenanceAction> provenance - the object's provenance.
            username creator - the user that first saved the object to the
                workspace.
            timestamp created - the date the object was first saved to the
                workspace.
            list<obj_ref> - the references contained within the object.
            obj_ref copied - the reference of the source object if this object is
                a copy and the copy source exists and is accessible.
                null otherwise.
            boolean copy_source_inaccessible - true if the object was copied from
                another object, but that object is no longer accessible to the
                user. False otherwise.
            mapping<id_type, list<extracted_id>> extracted_ids - any ids extracted
                from the object.
            string handle_error - if an error occurs while setting ACLs on
                embedded handle IDs, it will be reported here.
            string handle_stacktrace - the stacktrace for handle_error.

        */
        typedef structure {
            UnspecifiedObject data;
            object_info info;
            list<ProvenanceAction> provenance;
            username creator;
            timestamp created;
            list<obj_ref> refs;
            obj_ref copied;
            boolean copy_source_inaccessible;
            mapping<id_type, list<extracted_id>> extracted_ids;
            string handle_error;
            string handle_stacktrace;
        } ObjectData;

        :param name: (optional) the name for the object to be retrieved. if included, favored over ID
        :param wsid: the workspace to retrieve the object from
        :param objid: the id of the object to be retrieved

        """
        if name is None:
            result = self.ws_client.get_objects2(
                {'objects': [{
                    'objid': objid,
                    'workspace': wsid
                }]})['data'][0]
        else:
            result = self.ws_client.get_objects2(
                {'objects': [{
                    'name': name,
                    'workspace': wsid
                }]})[0]
        return result['data'], result['info']

    def get_info(self, wsid, objid=None, name=None):
        if name is None:
            return self.ws_client.get_object_info_new(
                {'objects': [{
                    'objid': objid,
                    'workspace': wsid
                }]})[0]
        else:
            return self.ws_client.get_object_info_new(
                {'objects': [{
                    'name': name,
                    'workspace': wsid
                }]})[0]

    def save_object(self, data, type, wsid, objid=None, name=None):
        """
        Saves an object in KBase

        :param data: data representing the object to be saved
        :param type: a string representing the KBase type of the object
        :param wsid: destination workspace
        :param objid: (optional) ID for location of object to be saved (use with care, overwriting/failures are at KBase's
            discretion).
        :param name: (optional) string name for the pbject to be saved
        :return: a list of information about the object as it is stored in KBase
        """
        sv = {u'data': data, u'type': type, u'name': name}
        if objid is not None:
            sv[u'objid'] = objid
        if name is not None:
            sv[u'name'] = name
        info = self.ws_client.save_objects({
            u'workspace': wsid,
            u'objects': [sv]
        })[0]
        return info[0], info[7]

    def list_objects(self, workspace_id, typestr=None):
        """
        returns a list of all the objects within a workspace in tuples (obj_id, ws_id, object_name)

        :rtype: list
        :param typestr: (optional) if set, lists only objects of this type (filter over default case)
        :param workspace_id: the workspace to list the objects from
        :return: a list of tuples of objects
        """
        objects = self.ws_client.list_objects({'workspaces': [workspace_id]})
        result = list()
        for obj in objects:
            object_type = obj[2]
            if typestr is None or typestr in object_type or types(
            )[typestr] in object_type:  # type filtering of our list
                result.append((obj[0], obj[6], obj[1], obj[2]))
        return result

    def clear_workspace(self, workspace_id):
        """
        clear all objects in a workspace (except for a Narrative object if applicable)
        :param workspace_id: workspace to clear
        :return: None
        """
        object_ids = [{
            'objid': info[0],
            'wsid': workspace_id
        } for info in self.ws_client.list_objects({'ids': [workspace_id]})
                      if not info[2].startswith('KBaseNarrative')]
        if len(object_ids) > 0:
            self.ws_client.delete_objects(object_ids)

    def delete_objects(self, object_tuples):
        """
        delete objects
        :param object_tuples: list of tuples representing objects to delete of the form (obj_id, ws_id)
        :return: None
        """
        object_ids = [{
            'objid': info[0],
            'wsid': info[1]
        } for info in object_tuples]
        if len(object_ids) > 0:
            self.ws_client.delete_objects(object_ids)

    def copy_object(self, from_tuple, to_tuple):
        """
        Copies an object in the service to another location in the service

        :param from_tuple: (objid, wsid) of the object to be copied
        :param to_tuple: (name, wsid) of the destination. workspace may differ. NOTE NAME IS A STRING
        :return: a tuple with information on the new objectmodel
        """
        info = self.ws_client.copy_object({
            'from': {
                'workspace': from_tuple[1],
                'objid': from_tuple[0]
            },
            'to': {
                'workspace': to_tuple[1],
                'name': to_tuple[0]
            }
        })
        return info[0], info[7]

    def gapfill_model(self, model, media, workspace=None):
        """

        :param model: FBAModel to gapfill
        :param media: Media to gapfill the model to
        :param workspace: destination workspace for new model and gapfill object
        :param name: (optional) name for new model. KBase will overwrite original if left unspecified.
        :return: the information for a new gap-filled model
        """
        if workspace is None:
            workspace = model.workspace_id
        params = {
            u'fbamodel_id': str(model.object_id),
            u'fbamodel_workspace': str(model.workspace_id),
            u'fbamodel_output_id': str(model.name),
            u'workspace': workspace,
            u'media_id': media.object_id,
            u'media_workspace': media.workspace_id,
            u'comprehensive_gapfill': False
        }
        self.fba_client.gapfill_metabolic_model(params)
        return model.object_id, model.workspace_id

    def _gapfill_solution(self, fba):
        """
            If this FBA was created as a gapfilling solution, then this returns a list of reactions to be added/adjusted
            :return: list(tuple) (rxn_id, direction, etc.)
            """
        # For now, naively assume first = best = only gap-filling solution
        solutions = fba['gapfillingSolutions']
        if len(solutions) < 1:
            raise ValueError("This is not a gapfilling solution")
        gsol = solutions[0]['gapfillingSolutionReactions']
        result = []
        for r in gsol:
            reaction_id = r['reaction_ref'].split('/')[-1] + '_' + \
                          r['compartment_ref'].split('/')[-1] + str(r['compartmentIndex'])
            direction = r['direction']
            result.append((reaction_id, direction))
        return result

    def fba_formulation(self, media):
        return {
            u'media': str(media.object_id),
            u'media_workspace': str(media.workspace_id)
        }

    def runfba(self, model, media, workspace=None):
        """
        runs Flux Balance Analysis on an FBAModel in the fba modeling service

        :param model: FBAModel to run flux balance analysis on
        :param media: Media to run FBA with
        :param workspace: (optional) workspace for the FBA object to be left in, default is model workspace
        :return: tuple identity of the FBA stored in the service
        """
        if workspace is None:
            workspace = model.workspace_id
        fba_params = {
            u'workspace': workspace,
            u'fbamodel_id': model.object_id,
            u'fbamodel_workspace': model.workspace_id,
            u'media_workspace': str(media.workspace_id),
            u'media_id': str(media.object_id),
            u'fba_output_id': model.name + '_fba'
        }
        info = self.fba_client.run_flux_balance_analysis(fba_params)
        obj_id = info['new_fba_ref'].split('/')[1]
        return obj_id, workspace

    def runfva(self, model, media, workspace=None):
        """
        runs Flux Balance Analysis on an FBAModel in the fba modeling service

        :param model: FBAModel to run flux balance analysis on
        :param media: Media to run FBA with
        :param workspace: (optional) workspace for the FBA object to be left in, default is model workspace
        :return: tuple identity of the FBA stored in the service
        """
        if workspace is None:
            workspace = model.workspace_id
        fba_params = {
            u'workspace': workspace,
            u'model': model.object_id,
            u'model_workspace': model.workspace_id,
            u'formulation': self.fba_formulation(media),
            u'fva': True
        }
        info = self.fba_client.runfba(fba_params)
        obj_id = info['new_fba_ref'].split('/')[1]
        return obj_id, workspace

    def translate_model(self, src_model, protcomp, workspace=None):
        """
        Uses the service to translate an FBAModel to a close genome relative
        :param protcomp: ProteomeComparison with source and target Genome
        :param src_model: FBAModel of source
        return: tuple identity of the translated model stored in the service
        """
        if workspace is None:
            workspace = src_model.workspace_id
        trans_params = {
            u'keep_nogene_rxn': 1,
            u'proteincomparison_id': protcomp.object_id,
            u'proteincomparison_workspace': protcomp.workspace_id,
            u'fbamodel_id': src_model.object_id,
            u'fbamodel_output_id': 'translated_' + src_model.name,
            u'fbamodel_workspace': src_model.workspace_id,
            u'workspace': workspace
        }
        info = self.fba_client.propagate_model_to_new_genome(trans_params)
        obj_id = info['new_fbamodel_ref'].split('/')[1]
        return obj_id, workspace

    def reconstruct_genome(self, genome, workspace=None):
        """
        Reconstructs a genome and returns the identity of a stored draft recon model (FBAModel)
        :param workspace: (optional) destination workspace. Default is genome.workspace_id
        :param genome: Genome to draft a reconstruction for
        :return: tuple identity of the draft model stored in the service (FBAModel)
        """
        if workspace is None:
            workspace = genome.workspace_id
        recon_params = {
            u'genome_id': genome.object_id,
            u'genome_workspace': genome.workspace_id,
            u'fbamodel_output_id': 'recon_' + genome.name,
            u'gapfill_model': False,  # TODO parameterize as option
            u'workspace': workspace
        }
        info = self.fba_client.build_metabolic_model(recon_params)
        # references returned here are sometimes inconsistent from other fba_tools APIs. Fetch obj info from ws service
        obj_name = info['new_fbamodel_ref'].split('/')[1]
        try:
            return int(obj_name), workspace
        except ValueError:
            ws_object_info = self.ws_client.get_object_info_new(
                {'objects': [{
                    'name': obj_name,
                    'workspace': workspace
                }]})[0]
            return ws_object_info[0], workspace

    def remove_reactions_in_place(self, model, reactions_to_remove):
        """
        Removes reactions from an FBAModel IN PLACE (changes object as it is stored)

        Recommended to make a copy first

        :param model: FBAModel to remove reactions form
        :param reactions_to_remove: reactions to remove (removal_id's)
        :return:
        """
        model_data, model_info = self.get_object(model.object_id,
                                                 model.workspace_id)
        rxns_to_remove = set(reactions_to_remove)
        prior_ids = set([r['id'] for r in model_data['modelreactions']])
        model_data['modelreactions'] = [
            r for r in model_data['modelreactions']
            if r['id'] not in rxns_to_remove
        ]
        current_ids = set([r['id'] for r in model_data['modelreactions']])
        removed = set(
            [rxn_id for rxn_id in prior_ids if rxn_id not in current_ids])
        if len(reactions_to_remove) != len(removed):
            print "WARNING: expected to remove", len(
                reactions_to_remove), "reactions but only removed", removed
            print "Failed to remove", set(reactions_to_remove) - removed
            print "Full arg reactions_to_remove:", ', '.join(
                reactions_to_remove)
        return self.save_object(model_data,
                                model_info[2],
                                model.workspace_id,
                                name=model.name)

    def remove_reaction(self, model, reaction, output_id=None, in_place=False):
        """

        :param model: FBAModel to remove the reaction from
        :param reaction: removal_id (str) of the reaction to remove
        :param output_id: (optional) (str) of the new name for the output model
        :param in_place: (optional) set to true if you want to remove the reaction from the model in place instead of making
            a new model. Will disregard output_id argument if set to true
        :return: info tuple for the new FBAModel in the stored environment
        """

        if in_place:
            self.remove_reactions_in_place(model, [reaction])
        if output_id is None:
            i = 0
            output_id = model.name + '-' + str(i)
            names = set(
                [info[3] for info in self.list_objects(model.workspace_id)])
            while output_id in names:
                i += 1
                output_id = model.name + '-' + str(i)

        model_data, model_info = self.get_object(model.object_id,
                                                 model.workspace_id)
        for i, r in enumerate(model_data['modelreactions']):
            if reaction == r['id']:
                # remove in json and save
                del model_data['modelreactions'][i]
        return self.save_object(model_data,
                                model_info[2],
                                model.workspace_id,
                                name=output_id)

    def add_reactions(self, model, new_reactions, workspace=None, name=None):
        """
        adds reactions to an FBAModel, in place or with a copy (set name to a new name)
        :param model: FBAModel to add reactions to
        :param new_reactions: list of tuples of the form (rxn_id, rxn_comp, direction, gpr) (gpr is optional)
        :param workspace: (optional) destination workspace, default is model.workspace_id
        :param name: output name for the new model. use to make a new one or modify in place
        :return: tuple identity of the model stored in the service (FBAModel)
        """
        reactions_to_add = [{
            'add_reaction_id':
            r[0],
            'reaction_compartment_id':
            len(r) > 1 and [r[1]] or [],
            'add_reaction_name':
            r[0],
            'add_reaction_direction':
            len(r) > 2 and r[2] or '=',
            'add_reaction_gpr':
            len(r) > 3 and r[3] or '',
        } for r in new_reactions]
        add_rxn_args = {
            'fbamodel_id': model.object_id,
            'fbamodel_workspace': model.workspace_id,
            'fbamodel_output_id': name or model.name,
            'workspace': workspace or model.workspace_id,
            'reactions_to_add': reactions_to_add
        }
        info = self.fba_client.edit_metabolic_model(add_rxn_args)
        return self._parse_objid_from_ref(
            info['new_fbamodel_ref']), model.workspace_id

    def add_reactions_manually(self,
                               model,
                               reactions,
                               workspace=None,
                               name=None):
        """
        Manually fix special reactions within the the object itself (use with caution)
        :param name: what to name the model when it is saved
        :param workspace: workspace to save the new FBAModel in
        :param reactions: (list<ModelReaction>) list of reactions to add manually
        :param model: FBAModel to add the reactions to
        """
        model.get_object()
        if workspace is None:
            workspace = model.workspace_id
        obj = model.data
        cpds = dict([(c['id'], c) for c in obj['modelcompounds']])
        for r in reactions:
            obj['modelreactions'].append(r.data)
            for cpd in r.data['modelReactionReagents']:
                c = cpd['modelcompound_ref'].split('/')[-1]
                if c not in cpds:
                    compound = {
                        'id':
                        c,
                        'name':
                        c,
                        'aliases': [u'mdlid:' + c.split('_')[0]],
                        'charge':
                        0,
                        'compound_ref':
                        '489/6/6/compounds/id/cpd00000',
                        'modelcompartment_ref':
                        '~/modelcompartments/id/' + c.split('_')[-1],
                        'formula':
                        ''
                    }
                    obj['modelcompounds'].append(compound)
                    cpds = dict([(c['id'], c) for c in obj['modelcompounds']])
        if name is not None:
            return self.save_object(obj,
                                    types()['FBAModel'],
                                    workspace,
                                    name=name)
        return self.save_object(obj,
                                types()['FBAModel'],
                                workspace,
                                objid=model.object_id)

    def adjust_directions_and_gprs(self, model, adjustments):
        reactions_to_change = [{
            'change_reaction_id': [r[0]],
            'change_reaction_direction': str(r[1]),
            'change_reaction_gpr': str(r[2])[1:-1],
        } for r in adjustments]
        change_rxn_args = {
            'fbamodel_id': model.object_id,
            'fbamodel_workspace': model.workspace_id,
            'fbamodel_output_id': model.name,
            'workspace': model.workspace_id,
            'reactions_to_change': reactions_to_change
        }
        self.fba_client.edit_metabolic_model(change_rxn_args)

    def adjust_directions(self, model, adjustments):
        """
        adjusts directions for reactions in an FBAModel
        :param model: FBAModel to adjust directions for
        :param adjustments: list<tuple> (rxn_id, direction). if rxn_id is not already in the model, it may be added
        :return: None
        """
        adjust_args = {
            'model': model.object_id,
            'workspace': model.workspace_id,
            'reaction': [r[0] for r in adjustments],
            'direction': [str(r[1]) for r in adjustments]
        }
        self.fba_client.adjust_model_reaction(adjust_args)

    def _integrate_gapfill(self, model, solution_fba, workspace=None):
        changes = self._gapfill_solution(solution_fba)
        reactions = dict([(r.rxn_id(), r) for r in model.get_reactions()])
        dirs = []
        additions = []
        for r in changes:
            if r[0] in reactions:
                dirs.append((reactions[r[0]].get_removal_id(), r[1]))
            else:
                temp = r[0].split('_')
                rxn_id = temp[0]
                rxn_comp = temp[1]
                additions.append((rxn_id, rxn_comp, r[1]))
        self.adjust_directions(model, dirs)
        info = self.add_reactions(model, additions, workspace=workspace)
        return info

    def model_info(self, model):
        comp = self.fba_client.compare_models({
            'models': [model.object_id],
            'workspaces': [model.workspace_id]
        })
        return (comp['model_comparisons'],
                dict([(r['reaction'], r)
                      for r in comp['reaction_comparisons']]))

    def init_workspace(self, ws=None, name=None):
        ws_id = ws
        ws_name = name
        if ws_name is None:
            ws_name = 'MMws'
        if ws is None:
            ws_conflict = True
            while ws_conflict:
                create_ws_params = {
                    'workspace':
                    ws_name,
                    'globalread':
                    'r',
                    'description':
                    "A workspace for storing the FBA's and meta data of the algorithm"
                }
                # Try to create a workspace, catch an error if the name is already in use
                try:
                    new_ws = self.ws_client.create_workspace(create_ws_params)
                    # new_ws is type workspace_info, a tuple where 0, 1 are id, name
                    ws_id = new_ws[0]
                    ws_name = new_ws[1]
                    ws_conflict = False
                except ServerError:
                    ws_name += str(random.randint(1, 9))
        return ws_id, ws_name

    def _parse_objid_from_ref(self, ref):
        return ref.split('/')[1]
Пример #5
0
class DataPalette():

    PROVENANCE = [{'service': 'DataPaletteService'}]
    DATA_PALETTE_WS_METADATA_KEY = 'data_palette_id'
    DEFAULT_PALETTE_OBJ_NAME = 'data_palette'
    PALETTE_OBJ_WS_TYPE = 'DataPalette.DataPalette'

    # set of types that cannot be added to a data palette, add to configuration
    PROHIBITED_DATA_TYPES = ['KBaseReport.Report',
                             'KBaseNarrative.Narrative',
                             'DataPalette.DataPalette']

    def __init__(self, ws_name_or_id, ws_url=None, token=None, ws_info=None, ws=None):
        if ws:
            self.ws = ws
        else:
            if ws_url is None:
                raise ValueError('ws_url was not defined')
            if token is None:
                print('DataPalette warning: token was not set')
            self.ws = Workspace(ws_url, token=token)

        if ws_info:
            if ws_name_or_id:
                raise ValueError("Either ws_name_or_id or ws_info should be set")
            self.ws_info = WorkspaceInfo(ws_info)
        else:
            if str(ws_name_or_id).isdigit():
                self.ws_info = WorkspaceInfo(self.ws.get_workspace_info({'id': int(ws_name_or_id)}))
            else:
                self.ws_info = WorkspaceInfo(self.ws.get_workspace_info({
                                                                    'workspace': str(ws_name_or_id)
                                                                    }))

        self.palette_ref = None

    def list(self, options):
        # if there is no data palette, return nothing
        dp_ref = self._get_root_data_palette_ref()
        if dp_ref is None:
            return []

        palette = self._get_data_palette()
        include_metadata = options.get('include_metadata', 0)
        palette = self._attach_palette_data_info(palette, include_metadata)

        return palette['data']

    def add(self, refs=None):
        '''
        Adds the provided references to the data palette.
        '''
        if len(refs) == 0:
            return {}

        # make sure the references to add are visible and valid
        objs = self._get_object_info(refs)
        self._validate_objects_to_add(objs)

        # get the existing palette and build an index
        palette = self._get_data_palette()
        data_index = self._build_palette_data_index(palette['data'])
        
        # changing refs in DataPalette so that they are pointed through
        # DataPalette object ref as ref-path
        self._extend_ref_paths_before_saving(palette)

        # perform the actual update palette update
        for obj_pos in range(0, len(objs)):
            o = objs[obj_pos]
            ws = str(o[6])
            obj = str(o[0])
            ver = str(o[4])
            ref = refs[obj_pos]['ref']     #ws + '/' + obj + '/' + ver

            if ws + '/' + obj in data_index:
                # the object is in the palette, so check versions
                index = data_index[ws + '/' + obj]
                if index['ver'] == ver:
                    # the version didn't change, so continue
                    continue
                # the version is different, so update it
                data_index[ws + '/' + obj]['ver'] = ver
                palette['data'][index['idx']]['ref'] = ref

            else:
                # the object wasn't in the palette, so add it
                idx = len(palette['data'])
                palette['data'].append({'ref': ref})
                data_index[ws + '/' + obj] = {'ver': ver, 'idx': idx}

        # save the updated palette and return
        self._save_data_palette(palette)
        return {}

    def remove(self, refs=None):
        dp_ref = self._get_root_data_palette_ref()
        if dp_ref is None:
            raise ValueError('Cannot remove from data_palette- data palette ' +
                             'for Workspace does not exist')

        if len(refs) == 0:
            return {}

        # right now, we only match on exact refs, so this works
        palette = self._get_data_palette()
        data_index = self._build_palette_data_index(palette['data'])

        index_to_delete = []
        for r in range(0, len(refs)):
            ref = refs[r]['ref']
            tokens = ref.split('/')
            if len(tokens) != 3:
                raise ValueError('Invalid absolute reference: ' + str(ref) + ' at position ' + str(r) +
                                 ' of removal list.  References must be full, absolute numerical WS refs.')
            is_digits = map(lambda x: x.isdigit(), tokens)
            if False in is_digits:
                raise ValueError('Invalid absolute reference: ' + str(ref) + ' at position ' + str(r) +
                                 ' of removal list.  References must be full, absolute numerical WS refs.')
            ws_slash_id = tokens[0] + '/' + tokens[1]
            if ws_slash_id in data_index:
                if data_index[ws_slash_id]['ver'] == tokens[2]:
                    index_to_delete.append(data_index[ws_slash_id]['idx'])
                else:
                    raise ValueError('Reference: ' + str(ref) + ' at position ' + str(r) +
                                     ' of removal list was not found in palette.  Object exists, but ' +
                                     'version was not correct.')
            else:
                raise ValueError('Reference: ' + str(ref) + ' at position ' + str(r) +
                                 ' of removal list was not found in palette.')

        index_to_delete = set(index_to_delete)
        for i in sorted(index_to_delete, reverse=True):
            del palette['data'][i]

        self._extend_ref_paths_before_saving(palette)
        self._save_data_palette(palette)

        return {}


    def _build_palette_data_index(self, palette_data):
        data_index = {}
        for k in range(0, len(palette_data)):
            tokens = palette_data[k]['ref'].split('/')
            key = tokens[0] + '/' + tokens[1]
            value = {'ver': tokens[2], 'idx': k}
            data_index[key] = value
        return data_index


    def _get_object_info(self, objects):
        return self.ws.get_object_info_new({'objects': objects})


    def _validate_objects_to_add(self, object_info_list):
        for info in object_info_list:
            # validate type, split and ignore the type version
            full_type_name = info[2].split('-')[0]
            if full_type_name in self.PROHIBITED_DATA_TYPES:
                raise ValueError('Object ' + str(info[1]) + ' (id=' + str(info[6]) + '/' +
                                 str(info[0]) + '/' + str(info[4]) + ') is a type (' + full_type_name +
                                 ') that cannot be added to a data palette.')


    def _attach_palette_data_info(self, palette, include_metadata = 0):
        # TODO: make sure we get object info via reference chain
        if len(palette['data']) == 0:
            return palette
        palette_ref = self._get_root_data_palette_ref()
        info_input = [{'ref': palette_ref + ';' + obj['ref']} for obj in palette['data']]
        all_info = self.ws.get_object_info_new({
                                               'objects': info_input,
                                               'includeMetadata': include_metadata
                                               })

        dp_ref = self._get_root_data_palette_ref()
        for k in range(0, len(all_info)):
            palette['data'][k]['info'] = all_info[k]
            palette['data'][k]['dp_ref'] = dp_ref
            palette['data'][k]['dp_refs'] = [dp_ref]

        return palette


    def _extend_ref_paths_before_saving(self, palette):
        dp_ref = self._get_root_data_palette_ref()
        for data_ref in palette['data']:
            data_ref['ref'] = dp_ref + ';' + data_ref['ref']


    def _save_data_palette(self, palette):
        obj_info = self.ws.save_objects({
                                        'id': self.ws_info.id,
                                        'objects': [{
                                            'type': self.PALETTE_OBJ_WS_TYPE,
                                            'objid': self._get_root_data_palette_objid(),
                                            'data': palette,
                                            'provenance': self.PROVENANCE,
                                            'hidden': 1
                                        }]
                                        })[0]
        return obj_info


    def _get_data_palette(self):
        palette_ref = self._get_root_data_palette_ref()
        if palette_ref is None:
            return self._create_data_palette()
        data = self.ws.get_objects2({
                                    'objects': [{'ref': palette_ref}]
                                    })
        return data['data'][0]['data']


    def _create_data_palette(self):
        # 1) save the data_palette object
        palette = {'data': []}
        new_palette_info = self.ws.save_objects({
                                                'id': self.ws_info.id,
                                                'objects': [{
                                                    'type': self.PALETTE_OBJ_WS_TYPE,
                                                    'name': self.DEFAULT_PALETTE_OBJ_NAME,
                                                    'data': palette,
                                                    'provenance': self.PROVENANCE,
                                                    'hidden': 1
                                                }]
                                                })[0]

        # 2) update ws metadata
        self._update_ws_palette_metadata(new_palette_info)
        return palette

    def create_from_existing_palette(self, existing_data_palette):
        # 1) make sure we can actually do it
        dp_target_ref = self._get_root_data_palette_ref()
        if dp_target_ref is not None:
            raise ValueError('Cannot copy data_palette- a data palette already exists in that workspace.')

        dp_source_ref = existing_data_palette._get_root_data_palette_ref()
        if dp_source_ref is None:
            # data palette did not exist, so we don't have to copy over anything
            return {}

        # 2) make the copy
        new_palette_info = self.ws.copy_object({
                                               'from': {'ref': dp_source_ref},
                                               'to': {'wsid': self.ws_info.id, 'name': self.DEFAULT_PALETTE_OBJ_NAME}
                                               })

        # 3) update ws metadata
        self._update_ws_palette_metadata(new_palette_info)
        return {}


    def set_palette_to_obj(self, new_data_palette_name_or_id):

        if new_data_palette_name_or_id is None:
            new_data_palette_name_or_id = self.DEFAULT_PALETTE_OBJ_NAME

        new_palette_ref = str(self.ws_info.id) + '/' + str(new_data_palette_name_or_id)
        new_palette_info = self._get_object_info([{'ref': new_palette_ref}])[0]

        if not str(new_palette_info[2]).startswith(self.PALETTE_OBJ_WS_TYPE):
            raise ValueError('Cannot set data palette for workspace to non-palette type.  Type of (' +
                             new_palette_ref + ') was: ' + str(new_palette_info[1]))

        self._update_ws_palette_metadata(new_palette_info)
        return {}


    def _get_root_data_palette_objid(self):
        ref = self._get_root_data_palette_ref()
        if ref is None:
            return None
        return self._get_root_data_palette_ref().split('/')[1]

    def _get_root_data_palette_ref(self):
        if self.palette_ref is not None:
            return self.palette_ref
        if self.DATA_PALETTE_WS_METADATA_KEY not in self.ws_info.metadata:
            return None
        dp_id = self.ws_info.metadata[self.DATA_PALETTE_WS_METADATA_KEY]
        if not str(dp_id).isdigit():
            raise ValueError('Warning: WS metadata for ' + str(self.ws_info.id) +
                             'was corrupted.  It is not set to an object ID.  It was: ' + str(dp_id))
        self.palette_ref = str(self.ws_info.id) + '/' + str(dp_id)
        return self.palette_ref

    def _update_ws_palette_metadata(self, palette_obj_info):
        self.ws.alter_workspace_metadata({
                                         'wsi': {
                                             'id': self.ws_info.id
                                         },
                                         'new': {
                                             self.DATA_PALETTE_WS_METADATA_KEY: str(palette_obj_info[0])
                                         }
                                         })
        # refresh local ws info
        self.ws_info = WorkspaceInfo(self.ws.get_workspace_info({'id': self.ws_info.id}))
Пример #6
0
    def stage_input(self, input_ref, fasta_file_extension):
        '''
        Stage input based on an input data reference for CheckM

        input_ref can be a reference to an Assembly, BinnedContigs, or (not yet implemented) a Genome

        This method creates a directory in the scratch area with the set of Fasta files, names
        will have the fasta_file_extension parameter tacked on.

            ex:

            staged_input = stage_input('124/15/1', 'fna')

            staged_input
            {"input_dir": '...'}
        '''
        # config
        #SERVICE_VER = 'dev'
        SERVICE_VER = 'release'

        # generate a folder in scratch to hold the input
        suffix = str(int(time.time() * 1000))
        input_dir = os.path.join(self.scratch, 'bins_' + suffix)
        all_seq_fasta = os.path.join(
            self.scratch,
            'all_sequences_' + suffix + '.' + fasta_file_extension)
        if not os.path.exists(input_dir):
            os.makedirs(input_dir)

        # 2) based on type, download the files
        ws = Workspace(self.ws_url)
        input_info = ws.get_object_info3({'objects': [{
            'ref': input_ref
        }]})['infos'][0]
        # 0 obj_id objid - the numerical id of the object.
        # 1 obj_name name - the name of the object.
        # 2 type_string type - the type of the object.
        # 3 timestamp save_date - the save date of the object.
        # 4 obj_ver ver - the version of the object.
        # 5 username saved_by - the user that saved or copied the object.
        # 6 ws_id wsid - the workspace containing the object.
        # 7 ws_name workspace - the workspace containing the object.
        # 8 string chsum - the md5 checksum of the object.
        # 9 int size - the size of the object in bytes.
        # 10 usermeta meta - arbitrary user-supplied metadata about
        #     the object.
        [
            OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I, SAVED_BY_I,
            WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I, META_I
        ] = range(11)  # object_info tuple
        obj_name = input_info[NAME_I]
        type_name = input_info[TYPE_I].split('-')[0]

        # auClient
        try:
            auClient = AssemblyUtil(self.callbackURL,
                                    token=self.ctx['token'],
                                    service_ver=SERVICE_VER)
        except Exception as e:
            raise ValueError(
                'Unable to instantiate auClient with callbackURL: ' +
                self.callbackURL + ' ERROR: ' + str(e))

        # setAPI_Client
        try:
            #setAPI_Client = SetAPI (url=self.callbackURL, token=self.ctx['token'])  # for SDK local.  local doesn't work for SetAPI
            setAPI_Client = SetAPI(
                url=self.serviceWizardURL,
                token=self.ctx['token'])  # for dynamic service
        except Exception as e:
            raise ValueError(
                'Unable to instantiate setAPI_Client with serviceWizardURL: ' +
                self.serviceWizardURL + ' ERROR: ' + str(e))

        # mguClient
        try:
            mguClient = MetagenomeUtils(self.callbackURL,
                                        token=self.ctx['token'],
                                        service_ver=SERVICE_VER)
        except Exception as e:
            raise ValueError(
                'Unable to instantiate mguClient with callbackURL: ' +
                self.callbackURL + ' ERROR: ' + str(e))

        # Standard Single Assembly
        #
        if type_name in [
                'KBaseGenomeAnnotations.Assembly', 'KBaseGenomes.ContigSet'
        ]:
            # create file data
            filename = os.path.join(input_dir,
                                    obj_name + '.' + fasta_file_extension)
            auClient.get_assembly_as_fasta({
                'ref': input_ref,
                'filename': filename
            })
            if not os.path.isfile(filename):
                raise ValueError(
                    'Error generating fasta file from an Assembly or ContigSet with AssemblyUtil'
                )
            # make sure fasta file isn't empty
            min_fasta_len = 1
            if not self.fasta_seq_len_at_least(filename, min_fasta_len):
                raise ValueError(
                    'Assembly or ContigSet is empty in filename: ' +
                    str(filename))

        # AssemblySet
        #
        elif type_name == 'KBaseSets.AssemblySet':

            # read assemblySet
            try:
                assemblySet_obj = setAPI_Client.get_assembly_set_v1({
                    'ref':
                    input_ref,
                    'include_item_info':
                    1
                })
            except Exception as e:
                raise ValueError('Unable to get object from workspace: (' +
                                 input_ref + ')' + str(e))
            assembly_refs = []
            assembly_names = []
            for assembly_item in assemblySet_obj['data']['items']:
                this_assembly_ref = assembly_item['ref']
                # assembly obj info
                try:
                    this_assembly_info = ws.get_object_info_new(
                        {'objects': [{
                            'ref': this_assembly_ref
                        }]})[0]
                    this_assembly_name = this_assembly_info[NAME_I]
                except Exception as e:
                    raise ValueError('Unable to get object from workspace: (' +
                                     this_assembly_ref + '): ' + str(e))
                assembly_refs.append(this_assembly_ref)
                assembly_names.append(this_assembly_name)

            # create file data (name for file is what's reported in results)
            for ass_i, assembly_ref in enumerate(assembly_refs):
                this_name = assembly_names[ass_i]
                filename = os.path.join(input_dir,
                                        this_name + '.' + fasta_file_extension)
                auClient.get_assembly_as_fasta({
                    'ref': assembly_ref,
                    'filename': filename
                })
                if not os.path.isfile(filename):
                    raise ValueError(
                        'Error generating fasta file from an Assembly or ContigSet with AssemblyUtil'
                    )
                # make sure fasta file isn't empty
                min_fasta_len = 1
                if not self.fasta_seq_len_at_least(filename, min_fasta_len):
                    raise ValueError(
                        'Assembly or ContigSet is empty in filename: ' +
                        str(filename))

        # Binned Contigs
        #
        elif type_name == 'KBaseMetagenomes.BinnedContigs':

            # download the bins as fasta and set the input folder name
            bin_file_dir = mguClient.binned_contigs_to_file({
                'input_ref': input_ref,
                'save_to_shock': 0
            })['bin_file_directory']
            os.rename(bin_file_dir, input_dir)
            # make sure fasta file isn't empty
            self.set_fasta_file_extensions(input_dir, fasta_file_extension)
            for (dirpath, dirnames, filenames) in os.walk(input_dir):
                for fasta_file in filenames:
                    fasta_path = os.path.join(input_dir, fasta_file)
                    min_fasta_len = 1
                    if not self.fasta_seq_len_at_least(fasta_path,
                                                       min_fasta_len):
                        raise ValueError(
                            'Binned Assembly is empty for fasta_path: ' +
                            str(fasta_path))
                break

        # Genome and GenomeSet
        #
        elif type_name == 'KBaseGenomes.Genome' or type_name == 'KBaseSearch.GenomeSet':
            genome_obj_names = []
            genome_sci_names = []
            genome_assembly_refs = []

            if type_name == 'KBaseGenomes.Genome':
                genomeSet_refs = [input_ref]
            else:  # get genomeSet_refs from GenomeSet object
                genomeSet_refs = []
                try:
                    genomeSet_object = ws.get_objects2(
                        {'objects': [{
                            'ref': input_ref
                        }]})['data'][0]['data']
                except Exception as e:
                    raise ValueError('Unable to fetch ' + str(input_ref) +
                                     ' object from workspace: ' + str(e))
                    #to get the full stack trace: traceback.format_exc()

                # iterate through genomeSet members
                for genome_id in genomeSet_object['elements'].keys():
                    if 'ref' not in genomeSet_object['elements'][genome_id] or \
                       genomeSet_object['elements'][genome_id]['ref'] == None or \
                       genomeSet_object['elements'][genome_id]['ref'] == '':
                        raise ValueError(
                            'genome_ref not found for genome_id: ' +
                            str(genome_id) + ' in genomeSet: ' +
                            str(input_ref))
                    else:
                        genomeSet_refs.append(
                            genomeSet_object['elements'][genome_id]['ref'])

            # genome obj data
            for i, this_input_ref in enumerate(genomeSet_refs):
                try:
                    objects = ws.get_objects2(
                        {'objects': [{
                            'ref': this_input_ref
                        }]})['data']
                    genome_obj = objects[0]['data']
                    genome_obj_info = objects[0]['info']
                    genome_obj_names.append(genome_obj_info[NAME_I])
                    genome_sci_names.append(genome_obj['scientific_name'])
                except:
                    raise ValueError("unable to fetch genome: " +
                                     this_input_ref)

                # Get genome_assembly_ref
                if ('contigset_ref' not in genome_obj or genome_obj['contigset_ref'] == None) \
                   and ('assembly_ref' not in genome_obj or genome_obj['assembly_ref'] == None):
                    msg = "Genome " + genome_obj_names[
                        i] + " (ref:" + input_ref + ") " + genome_sci_names[
                            i] + " MISSING BOTH contigset_ref AND assembly_ref.  Cannot process.  Exiting."
                    raise ValueError(msg)
                    continue
                elif 'assembly_ref' in genome_obj and genome_obj[
                        'assembly_ref'] != None:
                    msg = "Genome " + genome_obj_names[
                        i] + " (ref:" + input_ref + ") " + genome_sci_names[
                            i] + " USING assembly_ref: " + str(
                                genome_obj['assembly_ref'])
                    print(msg)
                    genome_assembly_refs.append(genome_obj['assembly_ref'])
                elif 'contigset_ref' in genome_obj and genome_obj[
                        'contigset_ref'] != None:
                    msg = "Genome " + genome_obj_names[
                        i] + " (ref:" + input_ref + ") " + genome_sci_names[
                            i] + " USING contigset_ref: " + str(
                                genome_obj['contigset_ref'])
                    print(msg)
                    genome_assembly_refs.append(genome_obj['contigset_ref'])

            # create file data (name for file is what's reported in results)
            for ass_i, assembly_ref in enumerate(genome_assembly_refs):
                this_name = genome_obj_names[ass_i]
                filename = os.path.join(input_dir,
                                        this_name + '.' + fasta_file_extension)
                auClient.get_assembly_as_fasta({
                    'ref': assembly_ref,
                    'filename': filename
                })
                if not os.path.isfile(filename):
                    raise ValueError(
                        'Error generating fasta file from an Assembly or ContigSet with AssemblyUtil'
                    )
                # make sure fasta file isn't empty
                min_fasta_len = 1
                if not self.fasta_seq_len_at_least(filename, min_fasta_len):
                    raise ValueError(
                        'Assembly or ContigSet is empty in filename: ' +
                        str(filename))

        # Unknown type slipped through
        #
        else:
            raise ValueError(
                'Cannot stage fasta file input directory from type: ' +
                type_name)

        # create summary fasta file with all bins
        self.cat_fasta_files(input_dir, fasta_file_extension, all_seq_fasta)

        return {
            'input_dir': input_dir,
            'folder_suffix': suffix,
            'all_seq_fasta': all_seq_fasta
        }
Пример #7
0
    def check_contig_cache(self, gref, token):
        ws_client = Workspace(self.ws_url, token=token)
        info = ws_client.get_object_info_new({"objects": [{"ref": gref}]})[0]
        inner_chsum = info[8]
        index_file = os.path.join(self.genome_index_dir,
                                  inner_chsum + "_ctg.tsv.gz")
        if not os.path.isfile(index_file):
            t1 = time.time()

            genome = self.get_one_genome(
                {
                    "objects": [{
                        "ref": gref,
                        "included": ["/contigset_ref", "/assembly_ref"]
                    }]
                }, token)
            ctg_ref = None
            ctg_incl = None
            if "contigset_ref" in genome:
                if self.debug:
                    print("    Loading contigs from ContigSet...")
                ctg_ref = genome["contigset_ref"]
                ctg_incl = ["/contigs/[*]/id", "/contigs/[*]/length"]
            elif "assembly_ref" in genome:
                if self.debug:
                    print("    Loading contigs from Assembly...")
                ctg_ref = genome["assembly_ref"]
                ctg_incl = ["/contigs/*/length"]
            # We allow now Genome objects without contigs. Just skip errors.
            contigs = {}
            if ctg_ref:
                assembly = ws_client.get_objects2({
                    "objects": [{
                        "included": ctg_incl,
                        "ref": gref,
                        "obj_ref_path": [ctg_ref]
                    }]
                })["data"][0]["data"]
                if "contigset_ref" in genome:
                    for ctg in assembly["contigs"]:
                        contigs[ctg["id"]] = [ctg["length"], 0]
                else:
                    for ctg_id in assembly["contigs"]:
                        contigs[ctg_id] = [
                            assembly["contigs"][ctg_id]["length"], 0
                        ]
                if self.debug:
                    print(("    (time=" + str(time.time() - t1) + ")"))
            inner_chsum = self.check_feature_cache(gref, token)
            # Reading features without sorting and grouping by contig_id
            index_iter = self.get_feature_sorted_iterator(inner_chsum, None)
            if self.debug:
                print("    Grouping features...")
            t1 = time.time()
            with index_iter:
                for line in index_iter:
                    items = line.rstrip('\n').split('\t')
                    contig_id = items[3]
                    if not contig_id:
                        continue
                    values = None
                    if contig_id in contigs:
                        values = contigs[contig_id]
                    else:
                        raise ValueError("Contig id=" + contig_id +
                                         " is not found")
                    values[1] += 1
            self.save_contig_tsv(contigs, inner_chsum)
            if self.debug:
                print(("    (time=" + str(time.time() - t1) + ")"))
        return inner_chsum