def workspace_to_file(ref, workspace='narrative', token=None): """Convert Workspace objects to the JSON format read by the mongomock module. Args: ref (str): Workspace object reference e.g. '1019/4/1' workspace (str): Name or full URL for workspace to contact; 'narrative' or 'ci' are recognized token (str): KBase auth token Return: (dict) Object in the mock schema """ from doekbase.workspace.client import Workspace if re.match(r'https://.*', workspace): url = workspace else: url = ws_url_template.format(workspace) if token is None: token = os.environ.get('KB_AUTH_TOKEN', '') if not token: raise ValueError('No `token` given and environment does not ' 'contain value for KB_AUTH_TOKEN') ws = Workspace(url, token=token) objlist = ws.get_objects([{'ref': ref}]) obj, oi = objlist[0], objlist[0]['info'] canonical_ref = "{0}/{1}/{2}".format(oi[6], oi[0], oi[4]) canonical_name = "{0}/{1}".format(oi[7], oi[1]) # convert to our schema d = { 'ref': canonical_ref, 'type': oi[2], 'name': canonical_name, 'links': obj['refs'], 'data': obj['data'], 'metadata': oi[10] } _log.debug( 'workspace_to_file: returning record for: {}'.format(canonical_ref)) return d
def run(service_host, kbase_host): pid = os.getpid() ws = Workspace(url=get_workspace_url(kbase_host), token=token) for name in workspace_names: while 1: print('[{:d}] List objects'.format(pid)) try: annotations = ws.list_objects({ "workspaces": [name], "type": "KBaseGenomeAnnotations.GenomeAnnotation" }) break except Exception as err: print('Retry on timeout: {}'.format(str(err))) print('[{:d}] Got {:d} objects'.format(pid, len(annotations))) for obj_num, obj in enumerate(annotations): ref = obj[7] + "/" + obj[1] print('[{:d}] Fetch {:d}/{:d}: {}'.format(pid, obj_num + 1, len(annotations), ref)) ga = GenomeAnnotationClientAPI( get_genome_annotation_url(service_host), token, ref) #taxon = TaxonClientAPI(services["taxon_service_url"],token,ga.get_taxon()) assembly = AssemblyClientAPI(get_assembly_url(service_host), token, ga.get_assembly()) while 1: try: fids = ga.get_feature_ids() fdata = ga.get_features() cids = assembly.get_contig_ids() contigs = assembly.get_contigs() except doekbase.data_api.exceptions.ServiceError as err: print('[{:d}] Error: {}'.format(pid, err)) time.sleep(0.5) print('[{:d}] Retrying'.format(pid))
def __init__(self, services=None, token=None, ref=None): """Create new object. Args: services (dict): Service configuration dictionary. Required keys: * workspace_service_url: URL for Workspace, such as `https://ci.kbase.us/services/ws/` ref (str): Object reference, which can be the name of the object (although this is not unique), or a numeric identifier in the format `A/B[/C]` where A is the number of the workspace, B is the number identifying the object, and C is the "version" number of the object. """ if services is None or type(services) != type({}): raise TypeError( "You must provide a service configuration dictionary! Found {0}" .format(type(services))) elif not services.has_key("workspace_service_url"): raise KeyError("Expecting workspace_service_url key!") if ref is None: raise TypeError("Missing object reference!") elif type(ref) != type("") and type(ref) != type(unicode()): raise TypeError("Invalid reference given, expected string! " "Found {0}".format(type(ref))) elif re.match(REF_PATTERN, ref) is None: raise TypeError( "Invalid workspace reference string! Found {0}".format(ref)) self.services = services self.ref = ref self._token = None ws_url = services["workspace_service_url"] local_workspace = False if '://' in ws_url: # assume a real Workspace server if token is None or len(token.strip()) == 0: self._token = get_token() else: self._token = token _log.debug('Connect to Workspace service at {}'.format(ws_url)) self.ws_client = Workspace(ws_url, token=self._token) else: _log.debug('Load from Workspace file at {}'.format(ws_url)) local_workspace = True self.ws_client = self._init_ws_from_files(ws_url) info_values = self.ws_client.get_object_info_new({ "objects": [{ "ref": self.ref }], "includeMetadata": 0, "ignoreErrors": 0 }) if not info_values: raise ValueError("Cannot find object: {}".format(self.ref)) oi = info_values[0] self._info = { "object_id": oi[0], "object_name": oi[1], "object_reference": "{0}/{1}".format(oi[6], oi[0]), "object_reference_versioned": "{0}/{1}/{2}".format(oi[6], oi[0], oi[4]), "type_string": oi[2], "save_date": oi[3], "version": oi[4], "saved_by": oi[5], "workspace_id": oi[6], "workspace_name": oi[7], "object_checksum": oi[8], "object_size": oi[9], "object_metadata": oi[10] } self._id = self._info["object_id"] self._name = self._info["object_name"] self._typestring = self.ws_client.translate_to_MD5_types( [self._info["type_string"]]).values()[0] self._version = str(self._info["version"]) self._schema = None self._history = None self._provenance = None self._data = None # Init stats self._stats = g_stats # Init the caching object. Pass in whether the object is # publically available (which can determine whether it is cached) if local_workspace: global_read = True # Local file-workspace objects are public else: wsinfo = self.ws_client.get_workspace_info( {'id': self._info['workspace_id']}) wsinfo_obj = WorkspaceInfo(*wsinfo) global_read = (wsinfo_obj.globalread == 'r') self._cache = cache.ObjectCache( self._info["object_reference_versioned"], is_public=global_read)
from doekbase.handle.Client import AbstractHandle as handleClient services = { # "workspace_service_url": "https://ci.kbase.us/services/ws/", # "shock_service_url": "https://ci.kbase.us/services/shock-api/", # "handle_service_url": "https://ci.kbase.us/services/handle_service/" # "workspace_service_url": "https://next.kbase.us/services/ws/", # "shock_service_url": "https://next.kbase.us/services/shock-api/", # "handle_service_url": "https://next.kbase.us/services/handle_service/" "workspace_service_url": "https://kbase.us/services/ws/", "shock_service_url": "https://kbase.us/services/shock-api/", "handle_service_url": "https://kbase.us/services/handle_service/" } token = os.environ["KB_AUTH_TOKEN"] ws = Workspace(url=services["workspace_service_url"], token=token) hc = handleClient(url=services["handle_service_url"], token=token) fix_workspace = "ReferenceEnsemblPlantGenomeAnnotations" count = 0 limit = 10000 skip = 0 done = False while not done: results = ws.list_objects({ "workspaces": [fix_workspace], "type": "KBaseGenomeAnnotations.Assembly", "skip": skip, "limit": limit })
def __init__(self, url, shock_url, token=None): self.token = token or get_token() self.ws_client = Workspace(url, token=self.token) super(WorkspaceConnection, self).__init__()