def __init__(self, config):
     self.ws_url = config["workspace-url"]
     self.callback_url = config['SDK_CALLBACK_URL']
     self.token = config['KB_AUTH_TOKEN']
     self.shock_url = config['shock-url']
     self.ws = Workspace(self.ws_url, token=self.token)
     self.dfu = DataFileUtil(self.callback_url)
     self.scratch = config['scratch']
Пример #2
0
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url, provenance):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = provenance

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions('kb_Bowtie2', provenance[0]['subactions'])
        print('Running kb_Bowtie2 version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.bowtie2 = Bowtie2Runner(self.scratch_dir)
        self.parallel_runner = KBParallel(self.callback_url)
        self.qualimap = kb_QualiMap(self.callback_url)
Пример #3
0
    def get_contig_info(self, ctx, params):
        """
        :param params: instance of type "GetContigInfoParams" -> structure:
           parameter "ref" of String, parameter "contig_id" of String
        :returns: instance of type "GetContigInfoResult" -> structure:
           parameter "contig" of type "contig" (contig_id - identifier of
           contig feature_count - number of features associated with contig
           length - the dna sequence length of the contig) -> structure:
           parameter "contig_id" of String, parameter "feature_count" of
           Long, parameter "length" of Long
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN get_contig_info
        if 'ref' not in params:
            raise RuntimeError(f"'ref' argument required for get_contig_info")
        if 'contig_id' not in params:
            raise RuntimeError(
                f"'contig_id' argument required for get_contig_info")
        contig_id = params['contig_id']
        ws = Workspace(self.config['workspace-url'], token=ctx['token'])
        ama_utils = AMAUtils(ws)
        params['included_fields'] = ['contig_ids', 'contig_lengths']
        data = ama_utils.get_annotated_metagenome_assembly(
            params)['genomes'][0]['data']
        contig_ids = data['contig_ids']
        contig_lengths = data['contig_lengths']
        for i, c in enumerate(contig_ids):
            if c == contig_id:
                length = contig_lengths[i]
                break
        if self.msu.status_good:
            feature_count = self.msu.search_contig_feature_count(
                ctx["token"], params.get("ref"), contig_id)
            result = {
                'contig': {
                    "contig_id": contig_id,
                    "length": length,
                    "feature_count": feature_count
                }
            }
        else:
            result = {
                'contig': {
                    "contig_id": contig_id,
                    "length": length,
                    "feature_count": 0
                }
            }
        #END get_contig_info

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method get_contig_info return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Пример #4
0
	def __init__(self, config):
        self.ws_url = config["workspace-url"]
        self.callback_url = config['SDK_CALLBACK_URL']
        self.token = config['KB_AUTH_TOKEN']
        self.shock_url = config['shock-url']
        self.df = DataFileUtil(self.callback_url)
        self.gsu = GenomeSearchUtil(self.callback_url)
        self.ws = Workspace(self.ws_url, token=self.token)
		self.scratch = config['scratch']
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        cls.callbackURL = environ.get('SDK_CALLBACK_URL')
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('ExpressionUtils'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            cls.token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'ExpressionUtils',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.shockURL = cls.cfg['shock-url']
        cls.wsURL = cls.cfg['workspace-url']
        cls.service_wizard_url = cls.cfg['srv-wiz-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.ws = Workspace(cls.wsURL, token=cls.token)
        cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                               token=cls.token)
        # create workspace
        wssuffix = int(time.time() * 1000)
        wsname = "test_expression_" + str(wssuffix)
        cls.wsinfo = cls.wsClient.create_workspace({'workspace': wsname})
        print('created workspace ' + cls.getWsName())

        cls.serviceImpl = ExpressionUtils(cls.cfg)
        cls.readUtils = ReadsUtils(cls.callbackURL)
        cls.dfu = DataFileUtil(cls.callbackURL, service_ver='dev')
        cls.dfu.ws_name_to_id(wsname)
        cls.assemblyUtil = AssemblyUtil(cls.callbackURL)
        cls.gfu = GenomeFileUtil(cls.callbackURL)
        cls.gaAPI = GenomeAnnotationAPI(cls.service_wizard_url)
        cls.rau = ReadsAlignmentUtils(cls.callbackURL)
        cls.scratch = cls.cfg['scratch']

        cls.staged = {}
        cls.nodes_to_delete = []
        cls.handles_to_delete = []
        cls.setupTestData()
Пример #6
0
    def list_data(self, ctx, params):
        '''
        '''
        token = self._extract_token(ctx)

        if 'workspaces' not in params:
            raise ValueError(
                'missing required field "workspaces" in parameters to list_data'
            )
        if not isinstance(params['workspaces'], list):
            raise ValueError('"workspaces" field must be a list')
        workspaces = params['workspaces']
        include_metadata = params.get('include_metadata', 0)

        ws = Workspace(self.ws_url, token=token)
        ws_info_list = []
        if len(workspaces) == 1:
            workspace = workspaces[0]
            list_params = {}
            if str(workspace).isdigit():
                list_params['id'] = int(workspace)
            else:
                list_params['workspace'] = str(workspace)
            ws_info_list.append(ws.get_workspace_info(list_params))
        else:
            ws_map = {key: True for key in workspaces}
            for ws_info in ws.list_workspace_info({'perm': 'r'}):
                if ws_info[1] in ws_map or str(ws_info[0]) in ws_map:
                    ws_info_list.append(ws_info)

        data = []
        dp_list_filter = {'include_metadata': include_metadata}
        data_palette_refs = {}
        for ws_info in ws_info_list:
            dp = DataPalette(None, ws_info=ws_info, ws=ws)
            data = data + dp.list(dp_list_filter)
            dp_ref = dp._get_root_data_palette_ref()
            if dp_ref:
                data_palette_refs[str(ws_info[0])] = dp_ref

        data = self._remove_duplicate_data(data)

        return {'data': data, 'data_palette_refs': data_palette_refs}
Пример #7
0
    def check_object_cache(self, ref, search_object, info_included,
                           index_dir, object_suffix, debug):
        ws = Workspace(self.ws_url, token=self.token)
        info = ws.get_object_info3({"objects": [{"ref": ref}]})['infos'][0]
        inner_chsum = info[8]
        index_file = os.path.join(index_dir,
                                  inner_chsum + object_suffix + ".tsv.gz")
        if not os.path.isfile(index_file):
            if debug:
                print("    Loading WS object...")
                t1 = time.time()

            included = self.build_info_included(search_object, info_included)
            object = ws.get_objects2({'objects': [{'ref': ref,
                                                   'included': included}]})['data'][0]['data']
            self.save_object_tsv(object[search_object], inner_chsum, info_included,
                                 index_dir, object_suffix)
            if debug:
                print("    (time=" + str(time.time() - t1) + ")")
        return inner_chsum
 def __init__(self, config):
     #BEGIN_CONSTRUCTOR
     self.config = config
     self.scratch = config['scratch']
     self.callback_url = os.environ['SDK_CALLBACK_URL']
     self.ws_url = config['workspace-url']
     self.ws_client = Workspace(self.ws_url)
     self.dfu = DataFileUtil(self.callback_url)
     self.demu = GenDiffExprMatrix(config)
     #END_CONSTRUCTOR
     pass
Пример #9
0
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self.workspaceURL = config['workspace-url']
        self.ws = Workspace(self.workspaceURL)
        self.shockURL = config['shock-url']
        self.logger = logging.getLogger()
        log_handler = logging.StreamHandler()
        log_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
        self.logger.addHandler(log_handler)

        #END_CONSTRUCTOR
        pass
Пример #10
0
    def __init__(self, config):
        self.ws_url = config.workspaceURL
        self.handle_url = config.handleURL
        self.shock_url = config.shockURL
        self.sw_url = config.srvWizURL
        self.token = config.token
        self.auth_service_url = config.authServiceUrl
        self.callback_url = config.callbackURL

        self.ws = Workspace(self.ws_url, token=self.token)
        self.auth_client = _KBaseAuth(self.auth_service_url)
        self.dfu = DataFileUtil(self.callback_url)
Пример #11
0
    def _put_cached_index(self, assembly_info, index_files_basename, output_dir, ws_for_cache):

        if not ws_for_cache:
            print('WARNING: bowtie2 index cannot be cached because "ws_for_cache" field not set')
            return False

        try:
            dfu = DataFileUtil(self.callback_url)
            result = dfu.file_to_shock({'file_path': output_dir,
                                        'make_handle': 1,
                                        'pack': 'targz'})

            bowtie2_index = {'handle': result['handle'], 'size': result['size'],
                             'assembly_ref': assembly_info['ref'],
                             'index_files_basename': index_files_basename}

            ws = Workspace(self.ws_url)
            save_params = {'objects': [{'hidden': 1,
                                        'provenance': self.provenance,
                                        'name': os.path.basename(output_dir),
                                        'data': bowtie2_index,
                                        'type': 'KBaseRNASeq.Bowtie2IndexV2'
                                        }]
                           }
            if ws_for_cache.strip().isdigit():
                save_params['id'] = int(ws_for_cache)
            else:
                save_params['workspace'] = ws_for_cache.strip()
            save_result = ws.save_objects(save_params)
            print('Bowtie2IndexV2 cached to: ')
            pprint(save_result[0])
            return True

        except Exception:
            # if we fail in saving the cached object, don't worry
            print('WARNING: exception encountered when trying to cache the index files:')
            print(traceback.format_exc())
            print('END WARNING: exception encountered when trying to cache the index files')

        return False
Пример #12
0
 def __init__(self, config, logger=None):
     self.config = config
     self.logger = logger
     self.callback_url = os.environ['SDK_CALLBACK_URL']
     self.scratch = os.path.join(config['scratch'],
                                 'DEM_' + str(uuid.uuid4()))
     self.ws_url = config['workspace-url']
     self.ws_client = Workspace(self.ws_url)
     self.fv = KBaseFeatureValues(self.callback_url)
     self.dfu = DataFileUtil(self.callback_url)
     self.setAPI = SetAPI(self.callback_url)
     self.gsu = GenomeSearchUtil(self.callback_url)
     self._mkdir_p(self.scratch)
Пример #13
0
    def __init__(self, ws_name_or_id, ws_url=None, token=None, ws_info=None, ws=None):
        if ws:
            self.ws = ws
        else:
            if ws_url is None:
                raise ValueError('ws_url was not defined')
            if token is None:
                print('DataPalette warning: token was not set')
            self.ws = Workspace(ws_url, token=token)

        if ws_info:
            if ws_name_or_id:
                raise ValueError("Either ws_name_or_id or ws_info should be set")
            self.ws_info = WorkspaceInfo(ws_info)
        else:
            if str(ws_name_or_id).isdigit():
                self.ws_info = WorkspaceInfo(self.ws.get_workspace_info({'id': int(ws_name_or_id)}))
            else:
                self.ws_info = WorkspaceInfo(self.ws.get_workspace_info({
                                                                    'workspace': str(ws_name_or_id)
                                                                    }))

        self.palette_ref = None
Пример #14
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('kb_deseq'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'kb_deseq',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.ws = Workspace(cls.wsURL, token=token)
        cls.serviceImpl = kb_deseq(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']

        cls.gfu = GenomeFileUtil(cls.callback_url)
        cls.dfu = DataFileUtil(cls.callback_url, service_ver='dev')
        cls.ru = ReadsUtils(cls.callback_url)
        cls.rau = ReadsAlignmentUtils(cls.callback_url, service_ver='dev')
        cls.stringtie = kb_stringtie(cls.callback_url, service_ver='dev')
        cls.eu = ExpressionUtils(cls.callback_url, service_ver='dev')

        cls.deseq_runner = DESeqUtil(cls.cfg)

        suffix = int(time.time() * 1000)
        cls.wsName = "test_kb_stringtie_" + str(suffix)
        cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.dfu.ws_name_to_id(cls.wsName)

        cls.prepare_data()
Пример #15
0
 def __init__(self, config):
     self.ws_url = config["workspace-url"]
     self.callback_url = config['SDK_CALLBACK_URL']
     self.token = config['KB_AUTH_TOKEN']
     self.shock_url = config['shock-url']
     self.scratch = config['scratch']
     self.srv_wiz_url = config['srv-wiz-url']
     self.ws = Workspace(self.ws_url, token=self.token)
     self.bt = kb_Bowtie2(self.callback_url)
     self.rau = ReadsAlignmentUtils(self.callback_url)
     self.qualimap = kb_QualiMap(self.callback_url)
     self.ru = ReadsUtils(self.callback_url)
     self.dfu = DataFileUtil(self.callback_url)
     self.set_client = SetAPI(self.srv_wiz_url)
    def test_build_hisat2_index_from_assembly_ok(self):
        manager = Hisat2IndexManager(self.wsURL, self.callback_url, self.scratch)
        ws = Workspace(self.wsURL)
        genome_obj_info = ws.get_objects2({
            'objects': [{'ref': self.genome_ref}],
            'no_data': 1
        })
        # get the list of genome refs from the returned info.
        # if there are no refs (or something funky with the return), this will be an empty list.
        # this WILL fail if data is an empty list. But it shouldn't be, and we know because
        # we have a real genome reference, or get_objects2 would fail.
        genome_obj_refs = genome_obj_info.get('data', [{}])[0].get('refs', [])

        # see which of those are of an appropriate type (ContigSet or Assembly), if any.
        assembly_ref = list()
        ref_params = [{'ref': x} for x in genome_obj_refs]
        ref_info = ws.get_object_info3({'objects': ref_params})
        for idx, info in enumerate(ref_info.get('infos')):
            if "KBaseGenomeAnnotations.Assembly" in info[2] or "KBaseGenomes.ContigSet" in info[2]:
                assembly_ref.append(";".join(ref_info.get('paths')[idx]))
        assembly_ref = assembly_ref[0]
        idx_prefix = manager.get_hisat2_index(assembly_ref)
        self.assertIn("kb_hisat2_idx", idx_prefix)
Пример #17
0
def fetch_fasta_from_genome(genome_ref, ws_url, callback_url):
    """
    Returns an assembly or contigset as FASTA.
    """
    if not check_ref_type(genome_ref, ['KBaseGenomes.Genome'], ws_url):
        raise ValueError(
            "The given genome_ref {} is not a KBaseGenomes.Genome type!")
    # test if genome references an assembly type
    # do get_objects2 without data. get list of refs
    ws = Workspace(ws_url)
    genome_obj_info = ws.get_objects2({
        'objects': [{
            'ref': genome_ref
        }],
        'no_data': 1
    })
    # get the list of genome refs from the returned info.
    # if there are no refs (or something funky with the return), this will be an empty list.
    # this WILL fail if data is an empty list. But it shouldn't be, and we know because
    # we have a real genome reference, or get_objects2 would fail.
    genome_obj_refs = genome_obj_info.get('data', [{}])[0].get('refs', [])

    # see which of those are of an appropriate type (ContigSet or Assembly), if any.
    assembly_ref = list()
    ref_params = [{'ref': x} for x in genome_obj_refs]
    ref_info = ws.get_object_info3({'objects': ref_params})
    for idx, info in enumerate(ref_info.get('infos')):
        if "KBaseGenomeAnnotations.Assembly" in info[
                2] or "KBaseGenomes.ContigSet" in info[2]:
            assembly_ref.append(";".join(ref_info.get('paths')[idx]))

    if len(assembly_ref) == 1:
        return fetch_fasta_from_assembly(assembly_ref[0], ws_url, callback_url)
    else:
        raise ValueError(
            "Multiple assemblies found associated with the given genome ref {}! "
            "Unable to continue.")
Пример #18
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('TreeUtils'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'TreeUtils',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = Workspace(cls.wsURL)
     cls.serviceImpl = TreeUtils(cls.cfg)
     cls.scratch = cls.cfg['scratch']
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
     cls.dfu = DataFileUtil(cls.callback_url)
     suffix = int(time.time() * 1000)
     cls.wsName = "test_CompoundSetUtils_" + str(suffix)
     ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
     cls.wsId = ret[0]
     cls.tree_obj = json.load(open('data/tree.json'))
     info = cls.dfu.save_objects({
         "id":
         cls.wsId,
         "objects": [{
             "type": "KBaseTrees.Tree",
             "data": cls.tree_obj,
             "name": "test_tree"
         }]
     })[0]
     cls.tree_ref = "%s/%s/%s" % (info[6], info[0], info[4])
Пример #19
0
 def setUpClass(cls):
     cls.token = environ.get('KB_AUTH_TOKEN', None)
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('kb_quast'):
         cls.cfg[nameval[0]] = nameval[1]
     authServiceUrl = cls.cfg.get(
         'auth-service-url',
         "https://kbase.us/services/authorization/Sessions/Login")
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(cls.token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         cls.token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'kb_quast',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.shockURL = cls.cfg['shock-url']
     cls.ws = Workspace(cls.cfg['workspace-url'], token=cls.token)
     cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                            token=cls.token)
     cls.au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
     cls.impl = kb_quast(cls.cfg)
     cls.scratch = cls.cfg['scratch']
     shutil.rmtree(cls.scratch)
     os.mkdir(cls.scratch)
     suffix = int(time.time() * 1000)
     wsName = "test_ReadsUtils_" + str(suffix)
     cls.ws_info = cls.ws.create_workspace({'workspace': wsName})
     cls.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL'])
     cls.staged = {}
     cls.nodes_to_delete = []
     cls.handles_to_delete = []
     #         cls.setupTestData()
     print('\n\n=============== Starting tests ==================')
Пример #20
0
 def __init__(self, config, services, logger=None):
     self.config = config
     self.logger = logger
     self.callback_url = os.environ['SDK_CALLBACK_URL']
     self.scratch = os.path.join(config['scratch'],
                                 'cuffdiff_merge_' + str(uuid.uuid4()))
     self.ws_url = config['workspace-url']
     self.services = services
     self.ws_client = Workspace(self.services['workspace_service_url'])
     self.dfu = DataFileUtil(self.callback_url)
     self.gfu = GenomeFileUtil(self.callback_url)
     self.rau = ReadsAlignmentUtils(self.callback_url)
     self.eu = ExpressionUtils(self.callback_url)
     self.deu = DifferentialExpressionUtils(self.callback_url)
     self.cuffmerge_runner = CuffMerge(config, logger)
     self.num_threads = mp.cpu_count()
     handler_utils._mkdir_p(self.scratch)
Пример #21
0
    def setUpClass(cls):
        token = os.environ.get('KB_AUTH_TOKEN', None)
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('NarrativeService'):
            cls.cfg[nameval[0]] = nameval[1]
        authServiceUrl = cls.cfg.get(
            'auth-service-url',
            "https://kbase.us/services/authorization/Sessions/Login")
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'NarrativeService',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        # Set up test Workspace
        cls.ws_url = cls.cfg['workspace-url']
        cls.ws_client = Workspace(cls.ws_url, token=token)
        cls.test_ws_info = cls._make_workspace()
        cls.test_ws_name = cls.test_ws_info[1]
        # Build test data stuff.
        # 1. Make a fake reads object - test for report (should be null)
        cls.fake_reads_upa = cls._make_fake_reads(cls.test_ws_name,
                                                  "FakeReads")

        # 2. Make a report, give it that reads object - test for report, should find it
        cls.fake_report_upa = cls._make_fake_report(cls.fake_reads_upa,
                                                    cls.test_ws_name)

        cls.service_impl = NarrativeService(cls.cfg)
    def __init__(self, config, provenance):
        self.workspace_url = config['workspace-url']
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.token = os.environ['KB_AUTH_TOKEN']
        self.provenance = provenance

        self.scratch = os.path.join(config['scratch'], str(uuid.uuid4()))
        _mkdir_p(self.scratch)

        if 'shock-url' in config:
            self.shock_url = config['shock-url']
        if 'handle-service-url' in config:
            self.handle_url = config['handle-service-url']

        self.ws_client = Workspace(self.workspace_url, token=self.token)
        self.kbr = KBaseReport(self.callback_url)

        self.genome_count_dir = os.path.join(self.scratch, str(uuid.uuid4()))
        _mkdir_p(self.genome_count_dir)
Пример #23
0
    def setUpClass(cls):
        cls.test_dir = os.path.dirname(os.path.abspath(__file__))
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        cls.cfg['token'] = cls.token
        for nameval in config.items('NarrativeIndexer'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        # authServiceUrl = cls.cfg['auth-service-url']
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        # cls.ctx = MethodContext(None)
        # cls.ctx.update({'token': cls.token,
        #                'user_id': user_id,
        #                'provenance': [
        #                    {'service': 'NarrativeIndexer',
        #                     'method': 'please_never_use_it_in_production',
        #                     'method_params': []
        #                     }],
        #                'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = Workspace(cls.wsURL)
        # Kafka
        cls.kserver = cls.cfg.get('kafka-server', 'kafka')
        cls.admin = AdminClient({'bootstrap.servers': cls.kserver})
        # create a random topic
        cls.topic = 'testevents-%d' % (randint(1, 10000))
        cls.cfg['kafka-topic'] = cls.topic
        cls.admin.delete_topics([cls.topic])
        new_topics = [
            NewTopic(cls.topic, num_partitions=1, replication_factor=1)
        ]
        cls.admin.create_topics(new_topics)

        # Create an instance
        cls.serviceImpl = NarrativeIndexer(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.producer = Producer({'bootstrap.servers': cls.kserver})
Пример #24
0
    def find_object_report(self, ctx, params):
        """
        find_object_report searches for a referencing report. All reports (if made properly) reference the objects
        that were created at the same time. To find that report, we search back up the reference chain.
        If the object in question was a copy, then there is no referencing report. We might still want to see it,
        though! If the original object is accessible, we'll continue the search from that object, and mark the
        associated object UPA in the return value.
        :param params: instance of type "FindObjectReportParams" (This first
           version only takes a single UPA as input and attempts to find the
           report that made it.) -> structure: parameter "upa" of String
        :returns: instance of type "FindObjectReportOutput" (report_upas: the
           UPAs for the report object. If empty list, then no report is
           available. But there might be more than one... object_upa: the UPA
           for the object that this report references. If the originally
           passed object was copied, then this will be the source of that
           copy that has a referencing report. copy_inaccessible: 1 if this
           object was copied, and the user can't see the source, so no
           report's available. error: if an error occurred while looking up
           (found an unavailable copy, or the report is not accessible), this
           will have a sensible string, more or less. Optional.) ->
           structure: parameter "report_upas" of list of String, parameter
           "object_upa" of String, parameter "copy_inaccessible" of type
           "boolean" (@range [0,1]), parameter "error" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN find_object_report
        report_fetcher = ReportFetcher(
            Workspace(self.workspaceURL, token=ctx["token"]))
        returnVal = report_fetcher.find_report_from_object(params['upa'])
        #END find_object_report

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method find_object_report return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Пример #25
0
    def remove_narratorial(self, ctx, params):
        """
        :param params: instance of type "RemoveNarratorialParams" ->
           structure: parameter "ws" of String
        :returns: instance of type "RemoveNarratorialResult" -> structure:
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN remove_narratorial
        if 'ws' not in params:
            raise ValueError(
                '"ws" field indicating WS name or id is required.')
        ws = Workspace(self.workspaceURL, token=ctx["token"])
        nu = NarratorialUtils()
        nu.remove_narratorial(params['ws'], ws)
        returnVal = {}
        #END remove_narratorial

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method remove_narratorial return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Пример #26
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('STAR'):
            cls.cfg[nameval[0]] = nameval[1]

        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)

        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'STAR',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.ws = Workspace(cls.wsURL, token=token)
        cls.serviceImpl = STAR(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
Пример #27
0
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 context):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = context.provenance()
        self.job_id = None
        rpc_context = context.get('rpc_context')
        if rpc_context is not None and hasattr(rpc_context, 'get'):
            current_call_ctx = rpc_context.get('call_stack')
            if len(current_call_ctx):
                self.job_id = current_call_ctx[0].get('job_id')

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(self.provenance) > 0:
            if 'subactions' in self.provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_BatchApp', self.provenance[0]['subactions'])
        print('Running kb_BatchApp version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.parallel_runner = KBParallel(self.callback_url, service_ver='dev')
Пример #28
0
class WorkspaceAdminUtil:
    def __init__(self, config):
        wsurl = config.get('workspace-url')
        self.atoken = config.get('ws-admin-token')
        self.noadmin = False
        if self.atoken is None or self.atoken == '':
            self.noadmin = True
            self.atoken = config['token']
        self.ws = Workspace(wsurl, token=self.atoken)

    def list_objects(self, params):
        """
        Provide something that acts like a standard listObjects
        """
        if self.noadmin:
            return self.ws.list_objects(params)
        return self.ws.administer({'command': 'listObjects', 'params': params})

    def get_objects2(self, params):
        """
        Provide something that acts like a standard getObjects
        """
        if self.noadmin:
            return self.ws.get_objects2(params)
        return self.ws.administer({'command': 'getObjects', 'params': params})

    def get_workspace_info(self, params):
        """
        Provide something that acts like a standard getObjects
        """
        if self.noadmin:
            return self.ws.get_workspace_info(params)
        return self.ws.administer({
            'command': 'getWorkspaceInfo',
            'params': params
        })
class DiffExprMatrixUtils:
    """
     Constains a set of functions for expression levels calculations.
    """

    PARAM_IN_WS_NAME = 'workspace_name'
    PARAM_IN_OBJ_NAME = 'output_obj_name'
    PARAM_IN_DIFFEXPMATSET_REF = 'diffExprMatrixSet_ref'

    def __init__(self, config, logger=None):
        self.config = config
        self.logger = logger
        self.scratch = os.path.join(config['scratch'], 'DEM_' + str(uuid.uuid4()))
        self.ws_url = config['workspace-url']
        self._mkdir_p(self.scratch)
        pass

    def _mkdir_p(self, path):
        """
        _mkdir_p: make directory for given path
        """
        if not path:
            return
        try:
            os.makedirs(path)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def process_params(self, params):
        """
        validates params passed to gen expression matrix method
        """
        for p in [self.PARAM_IN_DIFFEXPMATSET_REF]:
            if p not in params:
                raise ValueError('"{}" parameter is required, but missing'.format(p))

    def get_expressionset_data(self, expressionset_ref):

        expr_set_obj = self.ws_client.get_objects2(
            {'objects': [{'ref': expressionset_ref}]})['data'][0]

        expr_set_obj_type = expr_set_obj.get('info')[2]
        expr_set_data = dict()
        expr_set_data['ws_name'] = expr_set_obj.get('info')[7]
        expr_set_data['obj_name'] = expr_set_obj.get('info')[1]

        if re.match('KBaseRNASeq.RNASeqExpressionSet-\d.\d', expr_set_obj_type):
            expr_set_data['genome_ref'] = expr_set_obj['data']['genome_id']
            expr_obj_refs = list()
            for expr_obj in expr_set_obj['data']['mapped_expression_ids']:
                expr_obj_refs.append(expr_obj.values()[0])
            expr_set_data['expr_obj_refs'] = expr_obj_refs

        elif re.match('KBaseSets.ExpressionSet-\d.\d', expr_set_obj_type):
            items = expr_set_obj.get('data').get('items')
            expr_obj_refs = list()
            for item in items:
                expr_obj_refs.append(item['ref'])
            expr_obj = self.ws_client.get_objects2(
                {'objects': [{'ref': expr_obj_refs[0]}]})['data'][0]
            expr_set_data['genome_ref'] = expr_obj['data']['genome_id']
            expr_set_data['expr_obj_refs'] = expr_obj_refs
        else:
            raise TypeError(self.PARAM_IN_EXPSET_REF + ' should be of type ' +
                            'KBaseRNASeq.RNASeqExpressionSet ' +
                            'or KBaseSets.ExpressionSet')
        return expr_set_data

    def get_diffexpr_matrixset(self, params, token):

        self.ws_client = Workspace(self.ws_url, token=token)

        col_names = {'gene_id': 'gene',
                     'log2_fold_change': 'log2fc_f',
                     'p_value': 'p_value_f',
                     'q_value': 'q_value'}

        json_fields = ['log2fc_f', 'p_value_f', 'q_value']

        self.process_params(params)

        diffexprmatset_list = list()
        diffexprmatset_ref = params.get(self.PARAM_IN_DIFFEXPMATSET_REF)

        diffexprmatset_obj = self.ws_client.get_objects2(
                                {'objects': [{'ref': diffexprmatset_ref}]})['data'][0]

        items = diffexprmatset_obj.get('data').get('items')
        diffexprmat_refs = list()

        for item in items:
            diffexprmat_refs.append(item['ref'])
            self.logger.info('DiffExprMatrix ref: ' + item['ref'])

        for diffexprmat_ref in diffexprmat_refs:
            diffexprmat_dict = dict()
            diffexprmat_obj = self.ws_client.get_objects2(
                                {'objects': [{'ref': diffexprmat_ref}]})['data'][0]
            diffexprmat = diffexprmat_obj.get('data')
            diffexprmat_dict['condition_1'] = diffexprmat.get('condition_mapping').keys()[0]
            diffexprmat_dict['condition_2'] = diffexprmat.get('condition_mapping').values()[0]
            voldata = list()
            data = diffexprmat.get('data')

            for row_index, row_id in enumerate(data.get('row_ids')):
                row_data = dict()
                row_data['gene'] = row_id
                values = data.get('values')[row_index]
                for col_index in range(len(values)):
                    row_data[json_fields[col_index]] = values[col_index]

                voldata.append(row_data)

            diffexprmat_dict['voldata'] = voldata
            diffexprmatset_list.append(diffexprmat_dict)

        return diffexprmatset_list
Пример #30
0
class NarrativeManager:

    KB_CELL = 'kb-cell'
    KB_TYPE = 'type'
    KB_APP_CELL = 'kb_app'
    KB_FUNCTION_CELL = 'function_input'
    KB_OUTPUT_CELL = 'function_output'
    KB_ERROR_CELL = 'kb_error'
    KB_CODE_CELL = 'kb_code'
    KB_STATE = 'widget_state'

    DEBUG = False

    DATA_PALETTES_TYPES = DataPaletteTypes(False)

    def __init__(self, config, ctx, set_api_cache, dps_cache):
        self.narrativeMethodStoreURL = config['narrative-method-store']
        self.set_api_cache = set_api_cache  # DynamicServiceCache type
        self.dps_cache = dps_cache  # DynamicServiceCache type
        self.token = ctx["token"]
        self.user_id = ctx["user_id"]
        self.ws = Workspace(config['workspace-url'], token=self.token)
        self.intro_md_file = config['intro-markdown-file']
        # We switch DPs on only for internal Continuous Integration environment for now:
        if config['kbase-endpoint'].startswith("https://ci.kbase.us/"):
            self.DATA_PALETTES_TYPES = DataPaletteTypes(True)

    def list_objects_with_sets(self,
                               ws_id=None,
                               ws_name=None,
                               workspaces=None,
                               types=None,
                               include_metadata=0):
        if not workspaces:
            if (not ws_id) and (not ws_name):
                raise ValueError(
                    "One and only one of 'ws_id', 'ws_name', 'workspaces' " +
                    "parameters should be set")
            workspaces = [self._get_workspace_name_or_id(ws_id, ws_name)]
        return self._list_objects_with_sets(workspaces, types,
                                            include_metadata)

    def _list_objects_with_sets(self, workspaces, types, include_metadata):
        type_map = None
        if types is not None:
            type_map = {key: True for key in types}

        processed_refs = {}
        data = []
        if self.DEBUG:
            print("NarrativeManager._list_objects_with_sets: processing sets")
        t1 = time.time()
        set_ret = self.set_api_cache.call_method(
            "list_sets", [{
                'workspaces': workspaces,
                'include_set_item_info': 1,
                'include_raw_data_palettes': 1,
                'include_metadata': include_metadata
            }], self.token)
        sets = set_ret['sets']
        dp_data = set_ret.get('raw_data_palettes')
        dp_refs = set_ret.get('raw_data_palette_refs')
        for set_info in sets:
            # Process
            target_set_items = []
            for set_item in set_info['items']:
                target_set_items.append(set_item['info'])
            if self._check_info_type(set_info['info'], type_map):
                data_item = {
                    'object_info': set_info['info'],
                    'set_items': {
                        'set_items_info': target_set_items
                    }
                }
                data.append(data_item)
                processed_refs[set_info['ref']] = data_item
        if self.DEBUG:
            print("    (time=" + str(time.time() - t1) + ")")

        if self.DEBUG:
            print("NarrativeManager._list_objects_with_sets: loading ws_info")
        t2 = time.time()
        ws_info_list = []
        #for ws in workspaces:
        if len(workspaces) == 1:
            ws = workspaces[0]
            ws_id = None
            ws_name = None
            if str(ws).isdigit():
                ws_id = int(ws)
            else:
                ws_name = str(ws)
            ws_info_list.append(
                self.ws.get_workspace_info({
                    "id": ws_id,
                    "workspace": ws_name
                }))
        else:
            ws_map = {key: True for key in workspaces}
            for ws_info in self.ws.list_workspace_info({'perm': 'r'}):
                if ws_info[1] in ws_map or str(ws_info[0]) in ws_map:
                    ws_info_list.append(ws_info)
        if self.DEBUG:
            print("    (time=" + str(time.time() - t2) + ")")

        if self.DEBUG:
            print(
                "NarrativeManager._list_objects_with_sets: loading workspace objects"
            )
        t3 = time.time()
        for info in WorkspaceListObjectsIterator(
                self.ws,
                ws_info_list=ws_info_list,
                list_objects_params={'includeMetadata': include_metadata}):
            item_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
            if item_ref not in processed_refs and self._check_info_type(
                    info, type_map):
                data_item = {'object_info': info}
                data.append(data_item)
                processed_refs[item_ref] = data_item
        if self.DEBUG:
            print("    (time=" + str(time.time() - t3) + ")")

        if self.DEBUG:
            print(
                "NarrativeManager._list_objects_with_sets: processing DataPalettes"
            )
        t5 = time.time()
        if dp_data is None or dp_refs is None:
            dps = self.dps_cache
            dp_ret = dps.call_method("list_data",
                                     [{
                                         'workspaces': workspaces,
                                         'include_metadata': include_metadata
                                     }], self.token)
            dp_data = dp_ret['data']
            dp_refs = dp_ret['data_palette_refs']
        for item in dp_data:
            ref = item['ref']
            if self._check_info_type(item['info'], type_map):
                data_item = None
                if ref in processed_refs:
                    data_item = processed_refs[ref]
                else:
                    data_item = {'object_info': item['info']}
                    processed_refs[ref] = data_item
                    data.append(data_item)
                dp_info = {}
                if 'dp_ref' in item:
                    dp_info['ref'] = item['dp_ref']
                if 'dp_refs' in item:
                    dp_info['refs'] = item['dp_refs']
                data_item['dp_info'] = dp_info
        if self.DEBUG:
            print("    (time=" + str(time.time() - t5) + ")")
        return {"data": data, 'data_palette_refs': dp_refs}

    def _check_info_type(self, info, type_map):
        if type_map is None:
            return True
        obj_type = info[2].split('-')[0]
        return type_map.get(obj_type, False)

    def copy_narrative(self, newName, workspaceRef, workspaceId):
        time_ms = int(round(time.time() * 1000))
        newWsName = self.user_id + ':narrative_' + str(time_ms)
        # add the 'narrative' field to newWsMeta later.
        newWsMeta = {"is_temporary": "false", "narrative_nice_name": newName}

        # start with getting the existing narrative object.
        currentNarrative = self.ws.get_objects([{'ref': workspaceRef}])[0]
        if not workspaceId:
            workspaceId = currentNarrative['info'][6]
        # Let's prepare exceptions for clone the workspace.
        # 1) currentNarrative object:
        excluded_list = [{'objid': currentNarrative['info'][0]}]
        # 2) let's exclude objects of types under DataPalette handling:
        data_palette_type = "DataPalette.DataPalette"
        excluded_types = [data_palette_type]
        excluded_types.extend(self.DATA_PALETTES_TYPES.keys())
        add_to_palette_list = []
        dp_detected = False
        for obj_type in excluded_types:
            list_objects_params = {'type': obj_type}
            if obj_type == data_palette_type:
                list_objects_params['showHidden'] = 1
            for info in WorkspaceListObjectsIterator(
                    self.ws,
                    ws_id=workspaceId,
                    list_objects_params=list_objects_params):
                if obj_type == data_palette_type:
                    dp_detected = True
                else:
                    add_to_palette_list.append({
                        'ref':
                        str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
                    })
                excluded_list.append({'objid': info[0]})
        # clone the workspace EXCEPT for currentNarrative object + obejcts of DataPalette types:
        newWsId = self.ws.clone_workspace({
            'wsi': {
                'id': workspaceId
            },
            'workspace': newWsName,
            'meta': newWsMeta,
            'exclude': excluded_list
        })[0]
        try:
            if dp_detected:
                self.dps_cache.call_method(
                    "copy_palette", [{
                        'from_workspace': str(workspaceId),
                        'to_workspace': str(newWsId)
                    }], self.token)
            if len(add_to_palette_list) > 0:
                # There are objects in source workspace that have type under DataPalette handling
                # but these objects are physically stored in source workspace rather that saved
                # in DataPalette object. So they weren't copied by "dps.copy_palette".
                self.dps_cache.call_method("add_to_palette",
                                           [{
                                               'workspace': str(newWsId),
                                               'new_refs': add_to_palette_list
                                           }], self.token)

            # update the ref inside the narrative object and the new workspace metadata.
            newNarMetadata = currentNarrative['info'][10]
            newNarMetadata['name'] = newName
            newNarMetadata['ws_name'] = newWsName
            newNarMetadata['job_info'] = json.dumps({
                'queue_time': 0,
                'running': 0,
                'completed': 0,
                'run_time': 0,
                'error': 0
            })

            currentNarrative['data']['metadata']['name'] = newName
            currentNarrative['data']['metadata']['ws_name'] = newWsName
            currentNarrative['data']['metadata']['job_ids'] = {
                'apps': [],
                'methods': [],
                'job_usage': {
                    'queue_time': 0,
                    'run_time': 0
                }
            }
            # save the shiny new Narrative so it's at version 1
            newNarInfo = self.ws.save_objects({
                'id':
                newWsId,
                'objects': [{
                    'type': currentNarrative['info'][2],
                    'data': currentNarrative['data'],
                    'provenance': currentNarrative['provenance'],
                    'name': currentNarrative['info'][1],
                    'meta': newNarMetadata
                }]
            })
            # now, just update the workspace metadata to point
            # to the new narrative object
            newNarId = newNarInfo[0][0]
            self.ws.alter_workspace_metadata({
                'wsi': {
                    'id': newWsId
                },
                'new': {
                    'narrative': str(newNarId)
                }
            })
            return {'newWsId': newWsId, 'newNarId': newNarId}
        except:
            # let's delete copy of workspace so it's out of the way - it's broken
            self.ws.delete_workspace({'id': newWsId})
            raise  # continue raising previous exception

    def create_new_narrative(self, app, method, appparam, appData, markdown,
                             copydata, importData, includeIntroCell):
        if app and method:
            raise ValueError(
                "Must provide no more than one of the app or method params")

        if (not importData) and copydata:
            importData = copydata.split(';')

        if (not appData) and appparam:
            appData = []
            for tmp_item in appparam.split(';'):
                tmp_tuple = tmp_item.split(',')
                step_pos = None
                if tmp_tuple[0]:
                    try:
                        step_pos = int(tmp_tuple[0])
                    except ValueError:
                        pass
                appData.append([step_pos, tmp_tuple[1], tmp_tuple[2]])
        cells = None
        if app:
            cells = [{"app": app}]
        elif method:
            cells = [{"method": method}]
        elif markdown:
            cells = [{"markdown": markdown}]
        return self._create_temp_narrative(cells, appData, importData,
                                           includeIntroCell)

    def _get_intro_markdown(self):
        """
        Creates and returns a cell with the introductory text included.
        """
        # Load introductory markdown text
        with open(self.intro_md_file) as intro_file:
            intro_md = intro_file.read()
        return intro_md

    def _create_temp_narrative(self, cells, parameters, importData,
                               includeIntroCell):
        # Migration to python of JavaScript class from https://github.com/kbase/kbase-ui/blob/4d31151d13de0278765a69b2b09f3bcf0e832409/src/client/modules/plugins/narrativemanager/modules/narrativeManager.js#L414
        narr_id = int(round(time.time() * 1000))
        workspaceName = self.user_id + ':narrative_' + str(narr_id)
        narrativeName = "Narrative." + str(narr_id)

        ws = self.ws
        ws_info = ws.create_workspace({
            'workspace': workspaceName,
            'description': ''
        })
        newWorkspaceInfo = ServiceUtils.workspaceInfoToObject(ws_info)
        [narrativeObject, metadataExternal
         ] = self._fetchNarrativeObjects(workspaceName, cells, parameters,
                                         includeIntroCell)
        objectInfo = ws.save_objects({
            'workspace':
            workspaceName,
            'objects': [{
                'type':
                'KBaseNarrative.Narrative',
                'data':
                narrativeObject,
                'name':
                narrativeName,
                'meta':
                metadataExternal,
                'provenance': [{
                    'script':
                    'NarrativeManager.py',
                    'description':
                    'Created new ' + 'Workspace/Narrative bundle.'
                }],
                'hidden':
                0
            }]
        })[0]
        objectInfo = ServiceUtils.objectInfoToObject(objectInfo)
        self._completeNewNarrative(newWorkspaceInfo['id'], objectInfo['id'],
                                   importData)
        return {'workspaceInfo': newWorkspaceInfo, 'narrativeInfo': objectInfo}

    def _fetchNarrativeObjects(self, workspaceName, cells, parameters,
                               includeIntroCell):
        if not cells:
            cells = []
        # fetchSpecs
        appSpecIds = []
        methodSpecIds = []
        specMapping = {'apps': {}, 'methods': {}}
        for cell in cells:
            if 'app' in cell:
                appSpecIds.append(cell['app'])
            elif 'method' in cell:
                methodSpecIds.append(cell['method'])
        nms = NarrativeMethodStore(self.narrativeMethodStoreURL,
                                   token=self.token)
        if len(appSpecIds) > 0:
            appSpecs = nms.get_app_spec({'ids': appSpecIds})
            for spec in appSpecs:
                spec_id = spec['info']['id']
                specMapping['apps'][spec_id] = spec
        if len(methodSpecIds) > 0:
            methodSpecs = nms.get_method_spec({'ids': methodSpecIds})
            for spec in methodSpecs:
                spec_id = spec['info']['id']
                specMapping['methods'][spec_id] = spec
        # end of fetchSpecs
        metadata = {
            'job_ids': {
                'methods': [],
                'apps': [],
                'job_usage': {
                    'queue_time': 0,
                    'run_time': 0
                }
            },
            'format': 'ipynb',
            'creator': self.user_id,
            'ws_name': workspaceName,
            'name': 'Untitled',
            'type': 'KBaseNarrative.Narrative',
            'description': '',
            'data_dependencies': []
        }
        cellData = self._gatherCellData(cells, specMapping, parameters,
                                        includeIntroCell)
        narrativeObject = {
            'nbformat_minor': 0,
            'cells': cellData,
            'metadata': metadata,
            'nbformat': 4
        }
        metadataExternal = {}
        for key in metadata:
            value = metadata[key]
            if isinstance(value, basestring):
                metadataExternal[key] = value
            else:
                metadataExternal[key] = json.dumps(value)
        return [narrativeObject, metadataExternal]

    def _gatherCellData(self, cells, specMapping, parameters,
                        includeIntroCell):
        cell_data = []
        if includeIntroCell == 1:
            cell_data.append({
                'cell_type': 'markdown',
                'source': self._get_intro_markdown(),
                'metadata': {}
            })
        for cell_pos, cell in enumerate(cells):
            if 'app' in cell:
                cell_data.append(
                    self._buildAppCell(len(cell_data),
                                       specMapping['apps'][cell['app']],
                                       parameters))
            elif 'method' in cell:
                cell_data.append(
                    self._buildMethodCell(
                        len(cell_data), specMapping['methods'][cell['method']],
                        parameters))
            elif 'markdown' in cell:
                cell_data.append({
                    'cell_type': 'markdown',
                    'source': cell['markdown'],
                    'metadata': {}
                })
            else:
                raise ValueError("cannot add cell #" + str(cell_pos) +
                                 ", unrecognized cell content")
        return cell_data

    def _buildAppCell(self, pos, spec, params):
        cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4())
        cell = {
            'cell_type':
            'markdown',
            'source':
            "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" +
            cellId + "').kbaseNarrativeAppCell({'appSpec' : '" +
            self._safeJSONStringify(spec) + "', 'cellId' : '" + cellId +
            "'});" + "</script>",
            'metadata': {}
        }
        cellInfo = {}
        widgetState = []
        cellInfo[self.KB_TYPE] = self.KB_APP_CELL
        cellInfo['app'] = spec
        if params:
            steps = {}
            for param in params:
                stepid = 'step_' + str(param[0])
                if stepid not in steps:
                    steps[stepid] = {}
                    steps[stepid]['inputState'] = {}
                steps[stepid]['inputState'][param[1]] = param[2]
            state = {
                'state': {
                    'step': steps
                }
            }
            widgetState.append(state)
        cellInfo[self.KB_STATE] = widgetState
        cell['metadata'][self.KB_CELL] = cellInfo
        return cell

    def _buildMethodCell(self, pos, spec, params):
        cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4())
        cell = {
            'cell_type':
            'markdown',
            'source':
            "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" +
            cellId + "').kbaseNarrativeMethodCell({'method' : '" +
            self._safeJSONStringify(spec) + "'});" + "</script>",
            'metadata': {}
        }
        cellInfo = {'method': spec, 'widget': spec['widgets']['input']}
        cellInfo[self.KB_TYPE] = self.KB_FUNCTION_CELL
        widgetState = []
        if params:
            wparams = {}
            for param in params:
                wparams[param[1]] = param[2]
            widgetState.append({'state': wparams})
        cellInfo[self.KB_STATE] = widgetState
        cell['metadata'][self.KB_CELL] = cellInfo
        return cell

    def _completeNewNarrative(self, workspaceId, objectId, importData):
        self.ws.alter_workspace_metadata({
            'wsi': {
                'id': workspaceId
            },
            'new': {
                'narrative': str(objectId),
                'is_temporary': 'true'
            }
        })
        # copy_to_narrative:
        if not importData:
            return
        objectsToCopy = [{'ref': x} for x in importData]
        infoList = self.ws.get_object_info_new({
            'objects': objectsToCopy,
            'includeMetadata': 0
        })
        for item in infoList:
            objectInfo = ServiceUtils.objectInfoToObject(item)
            self.copy_object(objectInfo['ref'], workspaceId, None, None,
                             objectInfo)

    def _safeJSONStringify(self, obj):
        return json.dumps(self._safeJSONStringifyPrepare(obj))

    def _safeJSONStringifyPrepare(self, obj):
        if isinstance(obj, basestring):
            return obj.replace("'", "&apos;").replace('"', "&quot;")
        elif isinstance(obj, list):
            for pos in range(len(obj)):
                obj[pos] = self._safeJSONStringifyPrepare(obj[pos])
        elif isinstance(obj, dict):
            obj_keys = list(obj.keys())
            for key in obj_keys:
                obj[key] = self._safeJSONStringifyPrepare(obj[key])
        else:
            pass  # it's boolean/int/float/None
        return obj

    def _get_workspace_name_or_id(self, ws_id, ws_name):
        ret = ws_name
        if not ret:
            ret = str(ws_id)
        return ret

    def copy_object(self, ref, target_ws_id, target_ws_name, target_name,
                    src_info):
        # There should be some logic related to DataPalettes
        if (not target_ws_id) and (not target_ws_name):
            raise ValueError("Neither target workspace ID nor name is defined")
        if not src_info:
            src_info_tuple = self.ws.get_object_info_new({
                'objects': [{
                    'ref': ref
                }],
                'includeMetadata':
                0
            })[0]
            src_info = ServiceUtils.objectInfoToObject(src_info_tuple)
        type_name = src_info['typeModule'] + '.' + src_info['typeName']
        type_config = self.DATA_PALETTES_TYPES.get(type_name)
        if type_config is not None:
            # Copy with DataPaletteService
            if target_name:
                raise ValueError(
                    "'target_name' cannot be defined for DataPalette copy")
            target_ws_name_or_id = self._get_workspace_name_or_id(
                target_ws_id, target_ws_name)
            self.dps_cache.call_method("add_to_palette",
                                       [{
                                           'workspace': target_ws_name_or_id,
                                           'new_refs': [{
                                               'ref': ref
                                           }]
                                       }], self.token)
            return {'info': src_info}
        else:
            if not target_name:
                target_name = src_info['name']
            obj_info_tuple = self.ws.copy_object({
                'from': {
                    'ref': ref
                },
                'to': {
                    'wsid': target_ws_id,
                    'workspace': target_ws_name,
                    'name': target_name
                }
            })
            obj_info = ServiceUtils.objectInfoToObject(obj_info_tuple)
            return {'info': obj_info}

    def list_available_types(self, workspaces):
        data = self.list_objects_with_sets(workspaces=workspaces)['data']
        type_stat = {}
        for item in data:
            info = item['object_info']
            obj_type = info[2].split('-')[0]
            if obj_type in type_stat:
                type_stat[obj_type] += 1
            else:
                type_stat[obj_type] = 1
        return {'type_stat': type_stat}
class MutualInfoUtil:
    def __init__(self, config):
        self.ws_url = config["workspace-url"]
        self.callback_url = config['SDK_CALLBACK_URL']
        self.token = config['KB_AUTH_TOKEN']
        self.shock_url = config['shock-url']
        self.dfu = DataFileUtil(self.callback_url)
        self.ws = Workspace(self.ws_url, token=self.token)
        self.scratch = config['scratch']

    def _mkdir_p(self, path):
        """
		_mkdir_p: make directory for given path
		"""
        if not path:
            return
        try:
            os.makedirs(path)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def test_dfu(self):
        output_directory = self.scratch
        #output_directory = "/kb/module/test1/"
        #os.mkdir(output_directory)
        #self._mkdir_p(output_directory)

        test_file = os.path.join(output_directory, 'index.html')
        with open(test_file, 'w') as file:
            file.write("test!")
        print("OUTPUT DIR")
        print(output_directory)
        print(os.listdir(output_directory))
        print("file_to_shock")
        report_shock_id = self.dfu.file_to_shock({
            'file_path': output_directory,
            'pack': 'targz'
            #'pack': 'zip'
        })
        print(report_shock_id)
        return

    def _validate_run_flux_mutual_information_analysis_params(self, params):
        """
		_validate_run_flux_mutual_information_analysis_params:
				validates params passed to run_flux_mutual_information_analysis method
		"""

        log('start validating validate_run_flux_mutual_information_analysis params'
            )

        # check for required parameters
        for p in ['fbamodel_id', 'compounds', 'media_id', 'workspace_name']:
            if p not in params:
                raise ValueError(
                    '"{}" parameter is required, but missing'.format(p))

    def _get_file_from_ws(self, ref):
        try:
            file_path = self.ws.get_objects2({'objects': [{'ref': ref}]})
            file_path = file_path['data'][0]
        except Exception as e:
            raise ValueError('Unable to get object from workspace: (' + ref +
                             ')' + str(e))
        return file_path

    def _make_media_files(self, ws_name, base, compounds):
        """
		Build and store media objects for each combination of compound added to the base media.
		:param base: The base media file
		:param compounds: the set of compound to test
		:return: A list of media ids and a matrix with each media combination defined
		"""

        ref = ws_name + "/" + base
        if base.find("/") != -1:
            ref = base

        output = self._get_file_from_ws(ref)
        base_media = output['data']
        base = output['info'][1]
        myuuid = str(uuid.uuid4())
        media_ids = [base]
        new_media_list = []
        media_matrix = [[""] + compounds]
        media_matrix.append([[base] + [0] * len(compounds)])
        for n_comp in range(1, len(compounds) + 1):
            for combo in combinations(compounds, n_comp):
                new_media_id = base + '_v%s' % len(media_matrix)
                media_ids.append(new_media_id)
                media_matrix.append(
                    [new_media_id] +
                    [1 if comp in combo else 0 for comp in compounds])
                new_media = deepcopy(base_media)
                new_media['id'] = new_media_id
                new_media['name'] = new_media_id
                for new_comp in combo:
                    new_media['mediacompounds'].append({
                        'compound_ref':
                        '48/1/1/compounds/id/%s' % new_comp.split('_')[0],
                        'concentration':
                        1.0,
                        'maxFlux':
                        1000,
                        'minFlux':
                        -1000
                    })
                new_media_list.append(new_media)

        print("Made %s Media Files" % (len(media_ids) - 1))
        info = self.ws.save_objects({
            'workspace':
            ws_name,
            "objects": [{
                "hidden": 1,
                "type": "KBaseBiochem.Media",
                "data": media,
                "name": myuuid + "-" + media['name']
            } for media in new_media_list]
        })
        #print(info)
        return media_ids, media_matrix, myuuid

    def _run_fba(self, workspace_name, media_id_list, fbamodel_id, myuuid,
                 base_media):
        print('running fba')
        fba_tool_obj = fba_tools(self.callback_url, service_ver='dev')
        new_media_list = []
        for media in media_id_list:
            if media == base_media:
                new_media_list.append(workspace_name + "/" + media)
            else:
                new_media_list.append(workspace_name + "/" + myuuid + "-" +
                                      media)

        fba_tool_obj.run_flux_balance_analysis({
            "max_c_uptake":
            60,  #"max_c_uptake": 6, // previously default is 6 later set to 60
            "workspace": workspace_name,
            "fbamodel_id": fbamodel_id,
            "fba_output_id": fbamodel_id + ".mifba",
            "fbamodel_workspace": workspace_name,
            "media_id_list": new_media_list,
            "target_reaction": "bio1",
            "minimize_flux": 1
        })
        output = self.ws.get_objects2({
            'objects': [{
                'ref': workspace_name + "/" + fbamodel_id + '.mifba'
            }]
        })

        #json.dump(output, open(self.scratch+'/fba.json', 'w'))

        fba = output['data'][0]['data']
        biomass_data = "FBAs,Biomass\n"
        secretion_file = "," + ','.join(media_id_list) + "\n"
        full_secretion_file = "," + ','.join(media_id_list) + "\n"
        full_flux_file = "," + ','.join(media_id_list) + "\n"
        flux_file = "," + ','.join(media_id_list) + "\n"
        objectives = fba['other_objectives']
        for i in range(0, len(objectives)):
            biomass_data = biomass_data + media_id_list[i] + "," + str(
                objectives[i]) + "\n"

        flux_vars = fba['FBAReactionVariables']
        for var in flux_vars:
            id = var['modelreaction_ref'].split("/").pop()
            flux_file = flux_file + id
            full_flux_file = full_flux_file + id
            fluxes = var['other_values']
            for i in range(0, len(objectives)):
                if objectives[i] == 0:
                    full_flux_file = full_flux_file + ",0"
                    flux_file = flux_file + ",0"
                else:
                    full_flux_file = full_flux_file + "," + str(fluxes[i])
                    if abs(fluxes[i]) < 1e-7:
                        flux_file = flux_file + ",0"
                    else:
                        flux_file = flux_file + ",1"
            flux_file = flux_file + "\n"
            full_flux_file = full_flux_file + "\n"

        secretion_vars = fba['FBACompoundVariables']
        for var in secretion_vars:
            id = var['modelcompound_ref'].split("/").pop()
            secretion_file = secretion_file + id
            full_secretion_file = full_secretion_file + id
            fluxes = var['other_values']
            for i in range(0, len(objectives)):
                if objectives[i] == 0:
                    full_secretion_file = full_secretion_file + ",0"
                    secretion_file = secretion_file + ",0"
                else:
                    full_secretion_file = full_secretion_file + "," + str(
                        fluxes[i])
                    if abs(fluxes[i]) < 1e-7:
                        secretion_file = secretion_file + ",0"
                    elif fluxes[i] < 0:
                        secretion_file = secretion_file + ",-1"
                    else:
                        secretion_file = secretion_file + ",1"
            secretion_file = secretion_file + "\n"
            full_secretion_file = full_secretion_file + "\n"

        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(output_directory)
        biomass_path = os.path.join(output_directory, 'biomass.csv')
        secretion_path = os.path.join(output_directory, 'secretion.csv')
        flux_path = os.path.join(output_directory, 'flux.csv')
        full_secretion_path = os.path.join(output_directory,
                                           'full_secretion.csv')
        full_flux_path = os.path.join(output_directory, 'full_flux.csv')

        with open(biomass_path, 'w') as biomass_f:
            biomass_f.write(biomass_data)

        with open(secretion_path, 'w') as secretion_f:
            secretion_f.write(secretion_file)

        with open(flux_path, 'w') as flux_f:
            flux_f.write(flux_file)

        with open(full_secretion_path, 'w') as full_secretion_f:
            full_secretion_f.write(full_secretion_file)

        with open(full_flux_path, 'w') as full_flux_f:
            full_flux_f.write(full_flux_file)

        return [
            biomass_path, secretion_path, flux_path, full_secretion_path,
            full_flux_path
        ]

    def _make_index_html(self, result_file_path, mutual_info_dict):
        overview_content = ''
        overview_content += '<table><tr><th>Mutual Information for various chemical compound combinations'
        overview_content += ' Object</th></td>'
        overview_content += '<tr><th>Input Chemical Compound Combination</th>'
        overview_content += '<th>Mutual Information (in Bits)</th>'
        overview_content += '</tr>'

        for k, v in mutual_info_dict.items():
            overview_content += '<tr><td>{}</td><td>{}</td></tr>'.format(k, v)
        overview_content += '</table>'
        with open(result_file_path, 'w') as result_file:
            with open(
                    os.path.join(os.path.dirname(__file__),
                                 'report_template.html'),
                    'r') as report_template_file:
                report_template = report_template_file.read()
                report_template = report_template.replace(
                    '<p>Overview_Content</p>', overview_content)
                result_file.write(report_template)
        return

    def _generate_html_report(self, result_directory, mutual_info_dict):
        """
		_generate_html_report: generate html summary report
		"""
        #scratch, uui, datafileutil, file_to_shock, shockId, extended report

        log('start generating html report')

        html_report = list()

        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))

        self._mkdir_p(output_directory)
        result_file_path = os.path.join(output_directory,
                                        'mutual_information_report.html')

        shutil.copy(os.path.join(result_directory, 'MI_plot.png'),
                    os.path.join(output_directory, 'MI_plot.png'))

        overview_content = ''
        overview_content += '<table><tr><th>Mutual Information for various chemical compound combinations'
        overview_content += ' Object</th></td>'
        overview_content += '<tr><th>Input Chemical Compound Combination</th>'
        overview_content += '<th>Mutual Information (in Bits)</th>'
        overview_content += '</tr>'

        for k, v in mutual_info_dict.items():
            overview_content += '<tr><td>{}</td><td>{}</td></tr>'.format(k, v)
        overview_content += '</table>'

        with open(result_file_path, 'w') as result_file:
            with open(
                    os.path.join(os.path.dirname(__file__),
                                 'report_template.html'),
                    'r') as report_template_file:
                report_template = report_template_file.read()
                report_template = report_template.replace(
                    '<p>Overview_Content</p>', overview_content)
                result_file.write(report_template)

        report_shock_id = self.dfu.file_to_shock({
            'file_path': output_directory,
            'pack': 'targz'
        })['shock_id']

        #report_shock_id = self.dfu.file_to_shock({'file_path': output_directory,
        #										  'pack': 'zip'})['shock_id']

        html_report.append({
            'shock_id':
            report_shock_id,
            'name':
            os.path.basename(result_file_path),
            'label':
            os.path.basename(result_file_path),
            'description':
            'HTML summary report for Mutual Information App'
        })

        return html_report

    def _generate_report(self, result_directory, mutual_info_dict,
                         workspace_name):
        """
		_generate_report: generate summary report
		"""
        print('-->I am here *************')
        uuidStr = str(uuid.uuid4())
        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(output_directory)
        test_file = os.path.join(output_directory, "index.html")
        self._make_index_html(test_file, mutual_info_dict[1])
        #shutil.copy2(os.path.join(os.path.dirname(__file__), 'data', 'index.html'), output_directory)

        # shutil.copy('/kb/module/data/index.html', result_directory + '/' + uuidStr + '/index.html')
        json.dump(mutual_info_dict[0],
                  open(os.path.join(output_directory, 'pdata.json'), 'w'))
        #shutil.copy('pdata.json', result_directory + '/' + uuidStr + '/pdata.json')

        # DataFileUtils to shock
        print(output_directory)
        print(os.listdir(output_directory))
        report_shock_result = self.dfu.file_to_shock({
            'file_path': output_directory,
            'pack': 'targz'
        })
        #report_shock_result = self.dfu.file_to_shock({'file_path': output_directory,
        #											 'pack': 'zip'})

        report_shock_id = report_shock_result['shock_id']
        print(report_shock_result)

        report_file = {
            'name': 'index.html',
            'description': 'the report',
            'shock_id': report_shock_id
        }
        log('creating report')
        #output_html_files = self._generate_html_report(result_directory,
        #											   mutual_info_dict)
        report_params = {
            'message': '',
            'workspace_name': workspace_name,
            'html_links': [report_file],
            'file_links': [],
            'direct_html_link_index': 0,
            'html_window_height': 333,
            'report_object_name': 'MutualInfomation_report_' + uuidStr
        }

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output

######### @@@@@@@ALL THREE MUTUAL INFORMATION CALCULATION START FROM HERE@@@@@@@#############

    def _generate_mutual_info(self, media_matrix, fba_file, mi_options):

        #print('this is fba_file')
        #print(fba_file)
        df1 = pd.read_csv(fba_file[0])
        df1.values

        #df1.as_matrix()
        #print('-->printing df1')# **** rm
        #print(df1.to_string())# **** rm
        #print(type(df1))  # **** rm
        #print('-->printing media_matrix')
        #print(media_matrix)

        df3 = pd.DataFrame(columns=media_matrix[0][1:])
        for i in range(1, len(media_matrix)):
            if i == 1:
                df3.loc[media_matrix[i][0][0]] = media_matrix[i][0][1:]
            else:
                df3.loc[media_matrix[i][0]] = media_matrix[i][1:]

        #print('-->*************OK')
        #print(df3)

        #----Input validation of Media/FBAs with Binary Matrix FBAs------
        # 1.0 Number of rows in Media.csv file =  (Number of columns -1)
        #   1.0. If they are different: Through an ERROR saying missed match number of FBAs in media and binary matrix.
        # 1.1 Check whether the elements in Media.csv file contains only binary values (i.e. 0 and 1)
        #   1.1. If the elements are different: Through an ERROR saying not approapriate input values
        # 1.2 Check whether the compounds in Media.csv file match with number of FBAs
        #   1.2. If the compounds are different from number of FBAs: Through an ERROR saying not appropriate input values

        media_matrix = df3
        s_df1 = df1.shape
        s_df2 = media_matrix.shape
        #print(media_matrix,type(media_matrix))

        Temp_df2 = np.array(media_matrix.values)
        #print('-->******')
        #print(Temp_df2)
        # Create matrix with only the elements remove first column and all the rows
        Temp_df2 = Temp_df2[0:, 1:]

        Bin_val_check = np.array_equal(Temp_df2, Temp_df2.astype(bool))
        #num_compounds = (s_df2[1])-1
        num_compounds = s_df2[1]

        if ((s_df1[1] - 1) != s_df2[0]) or (Bin_val_check != True) or (int(
                math.log(s_df2[0], 2)) != num_compounds):
            print('invalid input values')

        #-----All possible combination of the chemical compounds----------------------
        # 2.0 Sperating m0 from rest of the lables

        Temp1_df2 = media_matrix
        #print('-->*************OK')
        #print(Temp1_df2)
        cols = Temp1_df2.columns
        for i in range(0, len(cols)):
            Temp1_df2.loc[Temp1_df2[cols[i]] == 1, cols[i]] = cols[i]
        #print('-->*************OK')
        #print (Temp1_df2)

        # 2.1 Creating a disctionary for all FBAs except m0
        #print(len(Temp1_df2))
        #print('--->*********')
        #print(Temp1_df2)

        mydict = {}
        for x in range(0, len(Temp1_df2)):
            for i in range(0, s_df2[1]):
                currentvalue = Temp1_df2.iloc[x, i]
                currentid = Temp1_df2.index[x]
                mydict.setdefault(currentid, [])
                if currentvalue != 0:
                    mydict[currentid].append(currentvalue)
                # Add the first key as m0
        media_0_name = Temp1_df2.index[0]
        mydict[media_0_name] = ["0"]
        # Sort the keys
        mydict = collections.OrderedDict(natsort.natsorted(mydict.items()))
        #print ('--> ********')
        compoundslist = Temp1_df2.columns.get_values()
        compoundslist.tolist()
        #print(compoundslist)
        #print('all possible combination')
        #print(len(compoundslist))

        # List of Compounds combination in the list
        my_combi_list = []
        for L in range(0, len(compoundslist) + 1):
            for subset in itertools.combinations(compoundslist, L):
                my_combi_list.append(list(subset))

        my_combi_list[0] = [0]
        # print(my_combi_list)
        '''
		for k, v in mydict.iteritems():
			#print('--> ********')
			print(k, v)
		'''

        # Created a dictionary where the keys:
        # list of compounds combination
        # values are corresponding FBAs list in df2
        result_dict = {}
        for element in my_combi_list[1:]:
            for k, v in mydict.iteritems():
                if set(v).issubset(set(map(lambda x: str(x), element))):
                    key = ','.join(map(lambda x: str(x), element))
                    if result_dict.get(key):
                        media_list = result_dict[key]
                        media_list.append(k)
                        media_list = list(set(media_list))
                        result_dict.update({key: media_list})
                    else:
                        result_dict.update({key: [media_0_name, k]})

        # Sort the keys
        result_dict['0'] = [media_0_name]
        result_dict = collections.OrderedDict(
            natsort.natsorted(result_dict.items()))
        # print(result_dict)
        #print('-->I am here **** OK')
        #print(result_dict)
        #print (df1)

        # Created a dictionary where the keys are:
        # list of compounds combination
        # values are compounds combination FBAs with df1 vaules
        All_Comp_Combi_dic = {}
        for column, value in result_dict.items():
            All_Comp_Combi_dic.update({column: df1.get(value)})

        # print('-->All_Comp_Combi_dic******')
        # print (All_Comp_Combi_dic)
        # print(result_dict)

        # To print an item from the All_Comp_Combi_dic
        df = (pd.DataFrame(All_Comp_Combi_dic.items()))
        #print('--> printing df')
        #print(df[0].to_string())
        #print(df[1][7])

        ######### INTRACELLULAR FLUX MUTUAL INFORMATION CALCULATION #############
        if mi_options == "flux":
            print('Intracellular flux')
            MI_dict = {}
            for k in range(0, len(df[0])):
                drop_rows_df = df[1][k].drop_duplicates(keep="first")
                drop_columns_df = drop_rows_df.T.drop_duplicates(
                    keep="first").T
                remove = []
                removed = {}
                count_values = {}
                cols = df[1][k].columns
                for i in range(len(cols) - 1):
                    duplicated = []
                    v = df[1][k][cols[i]].values
                    for j in range(i + 1, len(cols)):
                        if np.array_equal(v, df[1][k][cols[j]].values):
                            remove.append(cols[j])
                            duplicated.append(cols[j])
                    if duplicated and cols[i] not in remove:
                        removed.update({cols[i]: duplicated})
                    count = {}
                    for key, value in removed.items():
                        count.update({key: len(value)})

                    #print v

                    # print drop_columns_df
                    count_values = count.values()
                    # print count_values
                    count_values = map(lambda x: x + 1, count_values)
                    # print count_values
                    d = {x: count_values.count(x) for x in count_values}
                #print('-->count_values')
                #print(count_values)

                #-------Mutual Inforamtion (MI) calculation-------------
                FBAs = len(df[1][k].columns)
                pure_entropy = math.log(FBAs, 2)
                #print (pure_entropy) (-->ok rm)

                # If No duplicates exist and list "value" is empty
                if not count_values:
                    #print("List is empty")
                    No_duplicate_FBAs = len(drop_columns_df.columns)
                    conditional_entropy = -1 * (No_duplicate_FBAs * (
                        (1 / No_duplicate_FBAs) *
                        ((1 / 1) * math.log(1.0 / 1.0, 2))))
                    Mutual_Info = pure_entropy - conditional_entropy
                    #print('Mutaul Info:', Mutual_Info)

                if count_values:
                    # If duplicates exist and list "value" is not empty
                    conditional_entropy = 0
                    for key in d:
                        #print key, d[key]
                        Temp = -1 * d[key] * (key / float(FBAs)) * key * (
                            1.0 / key) * math.log(1.0 / key, 2)
                        conditional_entropy = Temp + conditional_entropy
                    #print "%3f" %Temp
                    Mutual_Info = pure_entropy - conditional_entropy

                MI_dict[df[0][k]] = Mutual_Info
                MI_dict['0'] = 0.0

            #Sorted MI_dict
            MI_dict = sorted(MI_dict.items(), key=lambda x: (-len(x[0]), x[0]))
            MI_dict = OrderedDict(MI_dict)
            #print(MI_dict)

            #print('-->rest')
            #print(compoundslist)
            #print(num_compounds)

            x_groups = [[] for x in range(num_compounds)]
            y_groups = [[] for x in range(num_compounds)]
            names = [[] for x in range(num_compounds)]
            Comp_Mapping = [[] for x in range(num_compounds)]

            for key, val in MI_dict.iteritems():
                del_count = key.count(',')
                x_groups[del_count].append(key)
                y_groups[del_count].append(val)

                # for x, y in zip(x_groups, y_groups):
                # data.append(go.Bar(x=x, y=y, name='test'))

            pdata = []
            for i in range(0, len(x_groups)):
                names[i] = str(i + 1) + ' Compound Combination'
                Comp_Mapping = str(i + 1) + '-' + compoundslist[i]

                record = {}
                record["x"] = []
                for e in x_groups[i]:
                    record["x"].append("c" + e)
                record["y"] = y_groups[i]
                record["names"] = names[i]
                record["Comp_Mapping"] = Comp_Mapping
                pdata.append(record)

            #print (pdata)
            #json.dump(pdata, open(self.scratch+'/pdata.json', 'w'))
            return [pdata, MI_dict]
            #return MI_dict

######### INPUT COMPONENTS AND BIOMASS FLUX MUTUAL INFORMATION CALCULATION #############
        if mi_options == "biomass":
            # Load the file contain the information of FBAs(media) along with corresponding Biomass (growth)
            print('biomass flux')
            df2 = pd.read_csv(fba_file[1])
            df2.values
            #print(df)

            MI_dict_biomass = {}
            for r in range(0, len(df[0])):
                reaction_states = df[1][r].head(1000)

                def get_groups(flux_df):
                    groups = collections.defaultdict(list)
                    unique = flux_df.aggregate(lambda x: hash(str(x.values)))
                    for k, v in unique[0:].iteritems():
                        groups[v].append(k)
                    return dict([(i, g)
                                 for i, g in enumerate(groups.values())])

                n_group = collections.defaultdict(int)
                groups = get_groups(reaction_states)

                for group in groups.values():
                    n_group[len(group)] += 1

                groups_count = {}
                for key, values in groups.items():
                    groups_count[key] = len(values)
                    # print groups_count

                # Take first FBA label of every group
                group_id = {}
                for k, v in groups.items():
                    group_id.update({k: groups.values()[k][0]})

                # Obtain the Biomass of each Group
                cols_df = group_id.values()
                cols_df2 = df2.columns
                #print (cols_df)

                # Dictionary of first FBA label of every group and its corresponding number of members
                groups_label_count = {}
                for k, v in groups_count.items():
                    groups_label_count.update({cols_df[k]: v})
                #print('groups_label_count')
                #print(groups_label_count)

                def get_cond_count(re_group):
                    media_cond = 0
                    for media in re_group['FBAs']:
                        media_cond += groups_label_count[media]
                    return media_cond

                # Extract FBA Groups biomass inside df2
                Groups_Biomass = df2[df2['FBAs'].isin(cols_df)]
                #print('-->I am here')
                #print(Groups_Biomass)

                # Regroup based on the biomass values
                re_group = Groups_Biomass.groupby('Biomass')
                biomass_FBAs_groups = re_group.aggregate(get_cond_count)

                biomass_FBAs_label_groups = Groups_Biomass.groupby(
                    "Biomass", sort=True).sum()
                #print(biomass_FBAs_label_groups)

                #print (biomass_FBAs_label_groups)

                Summery = pd.merge(left=biomass_FBAs_label_groups,
                                   left_index=True,
                                   right=biomass_FBAs_groups,
                                   right_index=True,
                                   how='inner')
                Data_4_CondMI = Summery.groupby('FBAs_y').count()
                Data_4_CondMI = Data_4_CondMI.to_dict(orient='dict')
                for k, v in Data_4_CondMI.items():
                    Data_4_CondMI = v

                Num_of_FBAs = Data_4_CondMI.keys()
                Count_Num_of_FBAs = Data_4_CondMI.values()

                # -------Mutual Inforamtion (MI) calculation Stage II (input compounds respect to BIOMASS-------------
                # Pure Entropy
                FBAs = len(df[1][r].columns)
                pure_entropy = math.log(FBAs, 2)

                conditional_entropy = 0.0
                for l in range(0, len(Count_Num_of_FBAs)):
                    temp = -1 * Count_Num_of_FBAs[l] * (
                        Num_of_FBAs[l] / float(FBAs)) * Num_of_FBAs[l] * (
                            1.0 / float(Num_of_FBAs[l]) *
                            (math.log(1.0 / float(Num_of_FBAs[l]), 2)))
                    conditional_entropy += temp

                Mutual_Info_Biomass = pure_entropy - conditional_entropy
                # print('Mutaul Info:', Mutual_Info_Biomass)

                #print(Mutual_Info_Biomass)
                MI_dict_biomass.update({df[0][r]: Mutual_Info_Biomass})

                #print(MI_dict_biomass)

            # Sorted MI_dict_biomass
            MI_dict_biomass = sorted(MI_dict_biomass.items(),
                                     key=lambda x: (-len(x[0]), x[0]))
            MI_dict_biomass = OrderedDict(MI_dict_biomass)

            #print(MI_dict_biomass)

            x_groups = [[] for x in range(num_compounds)]
            y_groups = [[] for x in range(num_compounds)]
            names = [[] for x in range(num_compounds)]
            Comp_Mapping = [[] for x in range(num_compounds)]

            for key, val in MI_dict_biomass.iteritems():
                del_count = key.count(',')
                x_groups[del_count].append(key)
                y_groups[del_count].append(val)

            pdata = []
            for i in range(0, len(x_groups)):
                names[i] = str(i + 1) + ' Compound Combination'
                Comp_Mapping = str(i + 1) + '-' + compoundslist[i]

                record = {}
                record["x"] = []
                for e in x_groups[i]:
                    record["x"].append("c" + e)
                record["y"] = y_groups[i]
                record["names"] = names[i]
                record["Comp_Mapping"] = Comp_Mapping
                pdata.append(record)
            return [pdata, MI_dict_biomass]

######### INPUT COMPONENTS AND BIOMASS, SECRETION FLUX MUTUAL INFORMATION CALCULATION #############

        if mi_options == "secretion":
            #Load the file contain the information of FBAs(media) along with corresponding Biomass (growth)
            print('secretion flux')
            df4 = pd.read_csv(fba_file[2], header=0, index_col=0)

            df4.index.name = 'FBAs'
            df4 = df4.T

            dfbiomass = pd.read_csv(fba_file[1])
            aa = dfbiomass['Biomass'].values.tolist()
            # print(len(aa))
            df4['Biomass'] = aa
            # print(df4.shape)
            compoundslist_b_u_s = list(df4.columns.values)
            #print(compoundslist_b_u_s)

            MI_dict_b_u_s = {}
            for r in range(0, len(df[0])):
                reaction_states = df[1][r].head(1000)

                def get_groups(flux_df):
                    groups = collections.defaultdict(list)
                    unique = flux_df.aggregate(lambda x: hash(str(x.values)))
                    for k, v in unique[0:].iteritems():
                        groups[v].append(k)
                    return dict([(i, g)
                                 for i, g in enumerate(groups.values())])

                n_group = collections.defaultdict(int)
                groups = get_groups(reaction_states)
                for group in groups.values():
                    n_group[len(group)] += 1
                #print(n_group)
                #print(groups)

                groups_count = {}
                for key, values in groups.items():
                    groups_count[key] = len(values)
                # print(groups_count)

                # Take first FBA label of every group
                group_id = {}
                for k, v in groups.items():
                    group_id.update({k: groups.values()[k][0]})

                # Obtain the Biomass of each Group
                cols_df = group_id.values()
                cols_df4 = df4.columns

                # Dictionary of first FBA label of every group and its corresponding number of members
                groups_label_count = {}
                for k, v in groups_count.items():
                    groups_label_count.update({cols_df[k]: v})

                #print(groups_label_count)

                # Extract FBA Groups biomass inside df4
                df5 = df4.reset_index()
                Groups_Biomass = df5[df5['index'].isin(cols_df)]
                #print(Groups_Biomass)

                # Regroup based on the biomass values
                re_group = Groups_Biomass.groupby(compoundslist_b_u_s)
                #print(re_group)

                my_list = []
                for index, values in re_group:
                    my_list.append(values['index'].values)

                #print(my_list)

                B_U_S_dict = {}
                for media in my_list:
                    if len(media) > 1:
                        media_cond = 0
                        for i in (0, len(media) - 1):
                            media_cond += groups_label_count[media[i]]
                        B_U_S_dict.update({str(media)[1:-1]: media_cond})
                        #final_my_dict.update({tuple(media.tolist()):media_cond})
                    else:
                        B_U_S_dict.update({
                            str(media)[1:-1]:
                            groups_label_count[str(tuple(
                                media.tolist()))[1:-1][:-1][1:-1]]
                        })

                B_U_S_dict = {k: v for k, v in B_U_S_dict.iteritems()}
                #print(B_U_S_dict)

                Summery = pd.DataFrame(B_U_S_dict.items(),
                                       columns=['index_x', 'index_y'])

                Data_4_CondMI = Summery.groupby('index_y').count()
                Data_4_CondMI = Data_4_CondMI.to_dict(orient='dict')

                #print(Data_4_CondMI)
                for k, v in Data_4_CondMI.items():
                    Data_4_CondMI = v

                Num_of_FBAs = Data_4_CondMI.keys()
                Count_Num_of_FBAs = Data_4_CondMI.values()
                #print(Num_of_FBAs)
                #print(Count_Num_of_FBAs)
                #print('-->***<---')

                # -------Mutual Inforamtion (MI) calculation Stage II (input compounds respect to Biomass, Uptake and Secretion-------------
                # Pure Entropy
                FBAs = len(df[1][r].columns)
                pure_entropy = math.log(FBAs, 2)

                conditional_entropy = 0.0
                for l in range(0, len(Count_Num_of_FBAs)):
                    temp = -1 * Count_Num_of_FBAs[l] * (
                        Num_of_FBAs[l] / float(FBAs)) * Num_of_FBAs[l] * (
                            1.0 / float(Num_of_FBAs[l]) *
                            (math.log(1.0 / float(Num_of_FBAs[l]), 2)))
                    conditional_entropy += temp

                Mutual_Info_B_U_S = pure_entropy - conditional_entropy
                # print('Mutaul Info:', Mutual_Info_B_U_S)

                MI_dict_b_u_s.update({df[0][r]: Mutual_Info_B_U_S})

            # Sorted MI_dict_biomass
            MI_dict_b_u_s = sorted(MI_dict_b_u_s.items(),
                                   key=lambda x: (-len(x[0]), x[0]))
            MI_dict_b_u_s = OrderedDict(MI_dict_b_u_s)

            #print(MI_dict_b_u_s)

            x_groups = [[] for x in range(num_compounds)]
            y_groups = [[] for x in range(num_compounds)]
            names = [[] for x in range(num_compounds)]
            Comp_Mapping = [[] for x in range(num_compounds)]

            for key, val in MI_dict_b_u_s.iteritems():
                del_count = key.count(',')
                x_groups[del_count].append(key)
                y_groups[del_count].append(val)

            # for x, y in zip(x_groups, y_groups):
            # data.append(go.Bar(x=x, y=y, name='test'))

            pdata = []
            for i in range(0, len(x_groups)):
                names[i] = str(i + 1) + ' Compound Combination'
                Comp_Mapping = str(i + 1) + '-' + compoundslist[i]

                record = {}
                record["x"] = []
                for e in x_groups[i]:
                    record["x"].append("c" + e)
                record["y"] = y_groups[i]
                record["names"] = names[i]
                record["Comp_Mapping"] = Comp_Mapping
                pdata.append(record)

            return [pdata, MI_dict_b_u_s]
Пример #32
0
class TaxonAPI:
    '''
    Module Name:
    TaxonAPI

    Module Description:
    A KBase module: TaxonAPI
    '''

    ######## WARNING FOR GEVENT USERS ####### noqa
    # Since asynchronous IO can lead to methods - even the same method -
    # interrupting each other, you must be *very* careful when using global
    # state. A method could easily clobber the state set by another while
    # the latter method is running.
    ######################################### noqa
    VERSION = "1.0.2"
    GIT_URL = ""
    GIT_COMMIT_HASH = "5b8cdf63a676a609ea4f180891cf75006640f148"

    #BEGIN_CLASS_HEADER
    _GENOME_TYPES = ['KBaseGenomes.Genome',
                     'KBaseGenomeAnnotations.GenomeAnnotation']
    _TAXON_TYPES = ['KBaseGenomeAnnotations.Taxon']

    @functools32.lru_cache(maxsize=1000)
    def get_object(self, ref):
        res = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]
        return res

    def get_data(self, ref):
        obj = self.get_object(ref)
        return obj['data']

    @functools32.lru_cache(maxsize=1000)
    def translate_to_MD5_types(self, ktype):
        return self.ws.translate_to_MD5_types([ktype]).values()[0]

    def get_referrers(self, ref):
        referrers = self.ws.list_referencing_objects(
            [{"ref": ref}])[0]
        object_refs_by_type = dict()
        tlist = []
        for x in referrers:
            tlist.append(x[2])
        typemap = self.ws.translate_to_MD5_types(tlist)
        for x in referrers:
            typestring = typemap[x[2]]
            if typestring not in object_refs_by_type:
                object_refs_by_type[typestring] = list()
            upa = '%d/%d/%d' % (x[6], x[0], x[4])
            object_refs_by_type[typestring].append(upa)
        return object_refs_by_type

    def get_reffers_type(self, ref, types):
        referrers = self.get_referrers(ref)
        children = list()
        for object_type in referrers:
            if object_type.split('-')[0] in types:
                children.extend(referrers[object_type])

        return children

    def make_hash(self, i):
        omd = i[10]
        if i[10] == {}:
            omd = None

        return {
            'type_string': i[2],
            'workspace_id': i[6],
            'object_checksum': i[8],
            'object_reference': '%d/%d' % (i[6], i[0]),
            'object_size': i[9],
            'saved_by': i[5],
            'object_id': i[0],
            'save_date': i[3],
            'object_metadata': omd,
            'object_name': i[1],
            'version': i[4],
            'workspace_name': i[7],
            'object_reference_versioned': '%d/%d/%d' % (i[6], i[0], i[4])
        }
    #END_CLASS_HEADER

    # config contains contents of config file in a hash or None if it couldn't
    # be found
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self.workspaceURL = config['workspace-url']
        self.ws = Workspace(self.workspaceURL)
        self.shockURL = config['shock-url']
        self.logger = logging.getLogger()
        log_handler = logging.StreamHandler()
        log_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
        self.logger.addHandler(log_handler)

        #END_CONSTRUCTOR
        pass


    def get_parent(self, ctx, ref):
        """
        Retrieve parent Taxon.
        @return Reference to parent Taxon.
        :param ref: instance of type "ObjectReference"
        :returns: instance of type "ObjectReference"
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_parent
        data = self.get_data(ref)
        try:
            returnVal = data['parent_taxon_ref']
            # returnVal=taxon_api.get_parent(ref_only=True)
        except:
            returnVal = ''
        #END get_parent

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method get_parent return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

    def get_children(self, ctx, ref):
        """
        Retrieve children Taxon.
        @return List of references to child Taxons.
        :param ref: instance of type "ObjectReference"
        :returns: instance of list of type "ObjectReference"
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_children
        returnVal = self.get_reffers_type(ref, self._TAXON_TYPES)
        #END get_children

        # At some point might do deeper type checking...
        if not isinstance(returnVal, list):
            raise ValueError('Method get_children return value ' +
                             'returnVal is not type list as required.')
        # return the results
        return [returnVal]

    def get_genome_annotations(self, ctx, ref):
        """
        funcdef GenomeAnnotation(s) that refer to this Taxon.
         If this is accessing a KBaseGenomes.Genome object, it will
         return an empty list (this information is not available).
         @return List of references to GenomeAnnotation objects.
        :param ref: instance of type "ObjectReference"
        :returns: instance of list of type "ObjectReference"
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_genome_annotations
        returnVal = self.get_reffers_type(ref, self._GENOME_TYPES)
        #END get_genome_annotations

        # At some point might do deeper type checking...
        if not isinstance(returnVal, list):
            raise ValueError('Method get_genome_annotations return value ' +
                             'returnVal is not type list as required.')
        # return the results
        return [returnVal]

    def get_scientific_lineage(self, ctx, ref):
        """
        Retrieve the scientific lineage.
        @return Strings for each 'unit' of the lineage, ordered in
          the usual way from Domain to Kingdom to Phylum, etc.
        :param ref: instance of type "ObjectReference"
        :returns: instance of list of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_scientific_lineage
        o = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data']
        returnVal = [x.strip() for x in o['scientific_lineage'].split(";")]
        #END get_scientific_lineage

        # At some point might do deeper type checking...
        if not isinstance(returnVal, list):
            raise ValueError('Method get_scientific_lineage return value ' +
                             'returnVal is not type list as required.')
        # return the results
        return [returnVal]

    def get_scientific_name(self, ctx, ref):
        """
        Retrieve the scientific name.
        @return The scientific name, e.g., "Escherichia Coli K12 str. MG1655"
        :param ref: instance of type "ObjectReference"
        :returns: instance of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_scientific_name
        obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data']
        returnVal = obj['scientific_name']
        #END get_scientific_name

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method get_scientific_name return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

    def get_taxonomic_id(self, ctx, ref):
        """
        Retrieve the NCBI taxonomic ID of this Taxon.
        For type KBaseGenomes.Genome, the ``source_id`` will be returned.
        @return Integer taxonomic ID.
        :param ref: instance of type "ObjectReference"
        :returns: instance of Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_taxonomic_id
        obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data']
        returnVal = obj['taxonomy_id']
        #END get_taxonomic_id

        # At some point might do deeper type checking...
        if not isinstance(returnVal, int):
            raise ValueError('Method get_taxonomic_id return value ' +
                             'returnVal is not type int as required.')
        # return the results
        return [returnVal]

    def get_kingdom(self, ctx, ref):
        """
        Retrieve the kingdom.
        :param ref: instance of type "ObjectReference"
        :returns: instance of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_kingdom
        obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data']
        returnVal = obj['kingdom']
        #END get_kingdom

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method get_kingdom return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

    def get_domain(self, ctx, ref):
        """
        Retrieve the domain.
        :param ref: instance of type "ObjectReference"
        :returns: instance of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_domain
        obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data']
        returnVal = obj['domain']
        #END get_domain

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method get_domain return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

    def get_genetic_code(self, ctx, ref):
        """
        Retrieve the genetic code.
        :param ref: instance of type "ObjectReference"
        :returns: instance of Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_genetic_code
        obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data']
        returnVal = obj['genetic_code']
        #END get_genetic_code

        # At some point might do deeper type checking...
        if not isinstance(returnVal, int):
            raise ValueError('Method get_genetic_code return value ' +
                             'returnVal is not type int as required.')
        # return the results
        return [returnVal]

    def get_aliases(self, ctx, ref):
        """
        Retrieve the aliases.
        :param ref: instance of type "ObjectReference"
        :returns: instance of list of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_aliases
        obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data']
        if 'aliases' in obj:
            returnVal = obj['aliases']
        else:
            returnVal = list()
        #END get_aliases

        # At some point might do deeper type checking...
        if not isinstance(returnVal, list):
            raise ValueError('Method get_aliases return value ' +
                             'returnVal is not type list as required.')
        # return the results
        return [returnVal]

    def get_info(self, ctx, ref):
        """
        Retrieve object info.
        @skip documentation
        :param ref: instance of type "ObjectReference"
        :returns: instance of type "ObjectInfo" (* @skip documentation) ->
           structure: parameter "object_id" of Long, parameter "object_name"
           of String, parameter "object_reference" of String, parameter
           "object_reference_versioned" of String, parameter "type_string" of
           String, parameter "save_date" of String, parameter "version" of
           Long, parameter "saved_by" of String, parameter "workspace_id" of
           Long, parameter "workspace_name" of String, parameter
           "object_checksum" of String, parameter "object_size" of Long,
           parameter "object_metadata" of mapping from String to String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_info
        # returnVal = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['info']
        i = self.get_object(ref)['info']
        #md5_typestr = self.ws.translate_to_MD5_types([i[2]]).values()[0]
        returnVal = self.make_hash(i)
        #END get_info

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_info return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def get_history(self, ctx, ref):
        """
        Retrieve object history.
        @skip documentation
        :param ref: instance of type "ObjectReference"
        :returns: instance of type "ObjectHistory" (* @skip documentation) ->
           list of type "ObjectInfo" (* @skip documentation) -> structure:
           parameter "object_id" of Long, parameter "object_name" of String,
           parameter "object_reference" of String, parameter
           "object_reference_versioned" of String, parameter "type_string" of
           String, parameter "save_date" of String, parameter "version" of
           Long, parameter "saved_by" of String, parameter "workspace_id" of
           Long, parameter "workspace_name" of String, parameter
           "object_checksum" of String, parameter "object_size" of Long,
           parameter "object_metadata" of mapping from String to String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_history
        # returnVal = self.ws.get_object_history({'ref': ref})
        returnVal = []
        for i in self.ws.get_object_history({'ref': ref}):
            returnVal.append(self.make_hash(i))
        #END get_history

        # At some point might do deeper type checking...
        if not isinstance(returnVal, list):
            raise ValueError('Method get_history return value ' +
                             'returnVal is not type list as required.')
        # return the results
        return [returnVal]

    def get_provenance(self, ctx, ref):
        """
        Retrieve object provenance.
        @skip documentation
        :param ref: instance of type "ObjectReference"
        :returns: instance of type "ObjectProvenance" (* @skip documentation)
           -> list of type "ObjectProvenanceAction" (* @skip documentation)
           -> structure: parameter "time" of String, parameter "service_name"
           of String, parameter "service_version" of String, parameter
           "service_method" of String, parameter "method_parameters" of list
           of String, parameter "script_name" of String, parameter
           "script_version" of String, parameter "script_command_line" of
           String, parameter "input_object_references" of list of String,
           parameter "validated_object_references" of list of String,
           parameter "intermediate_input_ids" of list of String, parameter
           "intermediate_output_ids" of list of String, parameter
           "external_data" of list of type "ExternalDataUnit" (* @skip
           documentation) -> structure: parameter "resource_name" of String,
           parameter "resource_url" of String, parameter "resource_version"
           of String, parameter "resource_release_date" of String, parameter
           "data_url" of String, parameter "data_id" of String, parameter
           "description" of String, parameter "description" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_provenance
        prov = self.ws.get_object_provenance([{"ref": ref}])[0]['provenance']
        returnVal = []
        copy_keys = {"time": "time",
                     "service": "service_name",
                     "service_ver": "service_version",
                     "method": "service_method",
                     "method_params": "method_parameters",
                     "script": "script_name",
                     "script_ver": "script_version",
                     "script_command_line": "script_command_line",
                     "input_ws_objects": "input_object_references",
                     "resolved_ws_objects": "validated_object_references",
                     "intermediate_incoming": "intermediate_input_ids",
                     "intermediate_outgoing": "intermediate_output_ids",
                     "external_data": "external_data",
                     "description": "description"
                     }

        for object_provenance in prov:
            action = dict()

            for k in copy_keys:
                if k in object_provenance:
                    if isinstance(object_provenance[k], list) and len(object_provenance[k]) == 0:
                        continue

                    action[copy_keys[k]] = object_provenance[k]

            returnVal.append(action)
        #END get_provenance

        # At some point might do deeper type checking...
        if not isinstance(returnVal, list):
            raise ValueError('Method get_provenance return value ' +
                             'returnVal is not type list as required.')
        # return the results
        return [returnVal]

    def get_id(self, ctx, ref):
        """
        Retrieve object identifier.
        @skip documentation
        :param ref: instance of type "ObjectReference"
        :returns: instance of Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_id
        returnVal = self.get_object(ref)['info'][0]
        #END get_id

        # At some point might do deeper type checking...
        if not isinstance(returnVal, int):
            raise ValueError('Method get_id return value ' +
                             'returnVal is not type int as required.')
        # return the results
        return [returnVal]

    def get_name(self, ctx, ref):
        """
        Retrieve object name.
        @skip documentation
        :param ref: instance of type "ObjectReference"
        :returns: instance of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_name
        returnVal = self.get_object(ref)['info'][1]
        #END get_name

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method get_name return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

    def get_version(self, ctx, ref):
        """
        Retrieve object version.
        @skip documentation
        :param ref: instance of type "ObjectReference"
        :returns: instance of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_version
        returnVal = str(self.get_object(ref)['info'][4])
        #END get_version

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method get_version return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

    def get_all_data(self, ctx, params):
        """
        :param params: instance of type "GetAllDataParams" -> structure:
           parameter "ref" of type "ObjectReference", parameter
           "include_decorated_scientific_lineage" of type "boolean" (A
           boolean. 0 = false, other = true.), parameter
           "include_decorated_children" of type "boolean" (A boolean. 0 =
           false, other = true.), parameter "exclude_children" of type
           "boolean" (A boolean. 0 = false, other = true.)
        :returns: instance of type "TaxonData" -> structure: parameter
           "parent" of type "ObjectReference", parameter "children" of list
           of type "ObjectReference", parameter "decorated_children" of list
           of type "TaxonInfo" -> structure: parameter "ref" of type
           "ObjectReference", parameter "scientific_name" of String,
           parameter "scientific_lineage" of list of String, parameter
           "decorated_scientific_lineage" of list of type "TaxonInfo" ->
           structure: parameter "ref" of type "ObjectReference", parameter
           "scientific_name" of String, parameter "scientific_name" of
           String, parameter "taxonomic_id" of Long, parameter "kingdom" of
           String, parameter "domain" of String, parameter "genetic_code" of
           Long, parameter "aliases" of list of String, parameter "obj_info"
           of type "ObjectInfo" (* @skip documentation) -> structure:
           parameter "object_id" of Long, parameter "object_name" of String,
           parameter "object_reference" of String, parameter
           "object_reference_versioned" of String, parameter "type_string" of
           String, parameter "save_date" of String, parameter "version" of
           Long, parameter "saved_by" of String, parameter "workspace_id" of
           Long, parameter "workspace_name" of String, parameter
           "object_checksum" of String, parameter "object_size" of Long,
           parameter "object_metadata" of mapping from String to String
        """
        # ctx is the context object
        # return variables are: d
        #BEGIN get_all_data
        d = {}
        ref = params['ref']

        obj = self.get_object(ref)
        data = obj['data']

        try:
            d['parent'] = data['parent_taxon_ref']
        except KeyError:
            print('Error getting parent for ' + ref)
            # +':\n'+ str(traceback.format_exc()))
            d['parent'] = None

        if 'exclude_children' in params and params['exclude_children'] == 1:
            pass
        else:
            d['children'] = self.get_reffers_type(ref, self._TAXON_TYPES)

        d['scientific_lineage'] = data['scientific_lineage']
        d['scientific_name'] = data['scientific_name']
        d['taxonomic_id'] = data['taxonomy_id']
        try:
            d['kingdom'] = data['kingdom']
            # throws error if not found, so catch and log it
        except KeyError:
            print('Error getting kingdom for ' + ref)
            # +':\n'+ str(traceback.format_exc()))
            d['kingdom'] = None

        d['domain'] = data['domain']
        d['genetic_code'] = data['genetic_code']
        d['aliases'] = None
        if 'aliases' in data:
            d['aliases'] = data['aliases']
        d['info'] = self.make_hash(obj['info'])

        key = 'include_decorated_scientific_lineage'
        if key in params and params[key] == 1:
            l = self.get_decorated_scientific_lineage(ctx, {'ref': ref})[0]
            d['decorated_scientific_lineage'] = l['decorated_scientific_lineage']

        key = 'include_decorated_children'
        if key in params and params[key] == 1:
            l = self.get_decorated_children(ctx, {'ref': ref})[0]
            d['decorated_children'] = l['decorated_children']
        #END get_all_data

        # At some point might do deeper type checking...
        if not isinstance(d, dict):
            raise ValueError('Method get_all_data return value ' +
                             'd is not type dict as required.')
        # return the results
        return [d]

    def get_decorated_scientific_lineage(self, ctx, params):
        """
        :param params: instance of type "GetDecoratedScientificLineageParams"
           -> structure: parameter "ref" of type "ObjectReference"
        :returns: instance of type "DecoratedScientificLineage" (list starts
           at the root, and goes on down to this) -> structure: parameter
           "decorated_scientific_lineage" of list of type "TaxonInfo" ->
           structure: parameter "ref" of type "ObjectReference", parameter
           "scientific_name" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_decorated_scientific_lineage

        lineageList = []
        ref = params['ref']

        while True:
            parent_data = None
            try:
                # note: doesn't look like there is a way to get a reference
                # of a Taxon directly (without constructing it from
                # object_info), so first get reference, then instantiate
                # another API object
                parent_ref = self.get_data(ref)['parent_taxon_ref']
                if parent_ref is not None:
                    data = self.get_data(ref)
                    scientific_name = data['scientific_name']
                    if scientific_name != 'root':
                        parent_data = {
                            'ref': parent_ref,
                            'scientific_name': scientific_name
                        }
                        ref = parent_ref

            except KeyError:
                # case where parent is not found
                pass

            if parent_data is not None:
                lineageList.append(parent_data)
            else:
                break

        lineageList.reverse()  # reverse list to match scientific_lineage style
        returnVal = {'decorated_scientific_lineage': lineageList[:-1]}

        #END get_decorated_scientific_lineage

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_decorated_scientific_lineage return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def get_decorated_children(self, ctx, params):
        """
        :param params: instance of type "GetDecoratedChildrenParams" ->
           structure: parameter "ref" of type "ObjectReference"
        :returns: instance of type "DecoratedChildren" -> structure:
           parameter "decorated_children" of list of type "TaxonInfo" ->
           structure: parameter "ref" of type "ObjectReference", parameter
           "scientific_name" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN get_decorated_children
        ref = params['ref']
        children_refs = self.get_reffers_type(ref, self._TAXON_TYPES)

        decorated_children = []
        for child_ref in children_refs:
            decorated_children.append({
                'ref': child_ref,
                'scientific_name': self.get_data(child_ref)['scientific_name']
            })

        returnVal = {'decorated_children': decorated_children}
        #END get_decorated_children

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method get_decorated_children return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
    def status(self, ctx):
        #BEGIN_STATUS
        returnVal = {'state': "OK", 'message': "", 'version': self.VERSION,
                     'git_url': self.GIT_URL, 'git_commit_hash': self.GIT_COMMIT_HASH}
        #END_STATUS
        return [returnVal]
class FeatureSetBuilder:

    def _mkdir_p(self, path):
        """
        _mkdir_p: make directory for given path
        """
        if not path:
            return
        try:
            os.makedirs(path)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def _validate_upload_featureset_from_diff_expr_params(self, params):
        """
        _validate_upload_featureset_from_diff_expr_params:
                validates params passed to upload_featureset_from_diff_expr method
        """

        log('start validating upload_featureset_from_diff_expr params')

        # check for required parameters
        for p in ['diff_expression_ref', 'workspace_name',
                  'p_cutoff', 'q_cutoff', 'fold_change_cutoff']:
            if p not in params:
                raise ValueError('"{}" parameter is required, but missing'.format(p))

        p = params.get('fold_scale_type')
        if p and p != 'logarithm':
            raise ValueError('"fold_scale_type" parameter must be set to "logarithm", if used')

    def _generate_report(self, up_feature_set_ref_list, down_feature_set_ref_list,
                         filtered_expression_matrix_ref_list, workspace_name):
        """
        _generate_report: generate summary report
        """

        log('start creating report')

        output_html_files = self._generate_html_report(up_feature_set_ref_list,
                                                       down_feature_set_ref_list)

        objects_created = list()
        for up_feature_set_ref in up_feature_set_ref_list:
            objects_created += [{'ref': up_feature_set_ref,
                                 'description': 'Upper FeatureSet Object'}]
        for down_feature_set_ref in down_feature_set_ref_list:
            objects_created += [{'ref': down_feature_set_ref,
                                 'description': 'Lower FeatureSet Object'}]

        for filtered_expression_matrix_ref in filtered_expression_matrix_ref_list:
            objects_created += [{'ref': filtered_expression_matrix_ref,
                                 'description': 'Filtered ExpressionMatrix Object'}]

        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'objects_created': objects_created,
                         'html_links': output_html_files,
                         'direct_html_link_index': 0,
                         'html_window_height': 333,
                         'report_object_name': 'kb_FeatureSetUtils_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output

    def _generate_html_report(self, up_feature_set_ref_list, down_feature_set_ref_list):
        """
        _generate_html_report: generate html summary report
        """

        log('start generating html report')
        html_report = list()

        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(output_directory)
        result_file_path = os.path.join(output_directory, 'report.html')

        uppper_feature_content = ''
        for up_feature_set_ref in up_feature_set_ref_list:
            feature_set_obj = self.ws.get_objects2({'objects':
                                                    [{'ref':
                                                     up_feature_set_ref}]})['data'][0]
            feature_set_data = feature_set_obj['data']
            feature_set_info = feature_set_obj['info']

            feature_set_name = feature_set_info[1]

            elements = feature_set_data.get('elements')
            feature_ids = elements.keys()

            uppper_feature_content += '<tr><td>{}</td><td>{}</td></tr>'.format(feature_set_name,
                                                                               len(feature_ids))

        lower_feature_content = ''
        for down_feature_set_ref in down_feature_set_ref_list:
            feature_set_obj = self.ws.get_objects2({'objects':
                                                    [{'ref':
                                                     down_feature_set_ref}]})['data'][0]
            feature_set_data = feature_set_obj['data']
            feature_set_info = feature_set_obj['info']

            feature_set_name = feature_set_info[1]

            elements = feature_set_data.get('elements')
            feature_ids = elements.keys()

            lower_feature_content += '<tr><td>{}</td><td>{}</td></tr>'.format(feature_set_name,
                                                                              len(feature_ids))

        with open(result_file_path, 'w') as result_file:
            with open(os.path.join(os.path.dirname(__file__), 'report_template.html'),
                      'r') as report_template_file:
                report_template = report_template_file.read()
                report_template = report_template.replace('<tr><td>Upper_FeatureSet</td></tr>',
                                                          uppper_feature_content)

                report_template = report_template.replace('<tr><td>Lower_FeatureSet</td></tr>',
                                                          lower_feature_content)

                result_file.write(report_template)

        html_report.append({'path': result_file_path,
                            'name': os.path.basename(result_file_path),
                            'label': os.path.basename(result_file_path),
                            'description': 'HTML summary report'})
        return html_report

    def _process_diff_expression(self, diff_expression_set_ref, result_directory,
                                 condition_label_pair):
        """
        _process_diff_expression: process differential expression object info
        """

        log('start processing differential expression object')

        diff_expr_set_data = self.ws.get_objects2({'objects':
                                                  [{'ref':
                                                   diff_expression_set_ref}]})['data'][0]['data']

        set_items = diff_expr_set_data['items']

        diff_expr_matrix_file_name = 'gene_results.csv'
        diff_expr_matrix_file = os.path.join(result_directory, diff_expr_matrix_file_name)

        with open(diff_expr_matrix_file, 'w') as csvfile:
            fieldnames = ['gene_id', 'log2_fold_change', 'p_value', 'q_value']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

        for set_item in set_items:
            diff_expression_ref = set_item['ref']

            diff_expression_data = self.ws.get_objects2({'objects':
                                                        [{'ref':
                                                         diff_expression_ref}]})['data'][0]['data']

            label_string = set_item['label']
            label_list = map(lambda x: x.strip(), label_string.split(','))
            condition_1 = label_list[0]
            condition_2 = label_list[1]

            if condition_1 in condition_label_pair and condition_2 in condition_label_pair:
                genome_id = diff_expression_data['genome_ref']
                matrix_data = diff_expression_data['data']
                selected_diff_expression_ref = diff_expression_ref

                with open(diff_expr_matrix_file, 'ab') as csvfile:
                    row_ids = matrix_data.get('row_ids')
                    row_values = matrix_data.get('values')
                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

                    for pos, row_id in enumerate(row_ids):
                        row_value = row_values[pos]
                        writer.writerow({'gene_id': row_id,
                                         'log2_fold_change': row_value[0],
                                         'p_value': row_value[1],
                                         'q_value': row_value[2]})

        return diff_expr_matrix_file, genome_id, selected_diff_expression_ref

    def _generate_feature_set(self, feature_ids, genome_id, workspace_name, feature_set_name):
        """
        _generate_feature_set: generate FeatureSet object

        KBaseCollections.FeatureSet type:
        typedef structure {
            string description;
            list<feature_id> element_ordering;
            mapping<feature_id, list<genome_ref>> elements;
        } FeatureSet;
        """

        log('start saving KBaseCollections.FeatureSet object')

        if isinstance(workspace_name, int) or workspace_name.isdigit():
            workspace_id = workspace_name
        else:
            workspace_id = self.dfu.ws_name_to_id(workspace_name)

        elements = {}
        map(lambda feature_id: elements.update({feature_id: [genome_id]}), feature_ids)
        feature_set_data = {'description': 'Generated FeatureSet from DifferentialExpression',
                            'element_ordering': feature_ids,
                            'elements': elements}

        object_type = 'KBaseCollections.FeatureSet'
        save_object_params = {
            'id': workspace_id,
            'objects': [{'type': object_type,
                         'data': feature_set_data,
                         'name': feature_set_name}]}

        dfu_oi = self.dfu.save_objects(save_object_params)[0]
        feature_set_obj_ref = str(dfu_oi[6]) + '/' + str(dfu_oi[0]) + '/' + str(dfu_oi[4])

        return feature_set_obj_ref

    def _process_matrix_file(self, diff_expr_matrix_file, comp_p_value, comp_q_value,
                             comp_fold_change_cutoff):
        """
        _process_matrix_file: filter matrix file by given cutoffs
        """

        log('start processing matrix file')

        up_feature_ids = []
        down_feature_ids = []

        if comp_fold_change_cutoff < 0:
            comp_fold_change_cutoff = -comp_fold_change_cutoff

        with open(diff_expr_matrix_file, 'r') as file:
            reader = csv.DictReader(file)

            for row in reader:
                feature_id = row['gene_id']
                row_p_value = row['p_value']
                row_q_value = row['q_value']
                row_fold_change_cutoff = row['log2_fold_change']

                null_value = set(['NA', 'null', ''])
                col_value = set([row_p_value, row_q_value, row_fold_change_cutoff])

                if not col_value.intersection(null_value):
                    p_value_condition = float(row_p_value) <= comp_p_value
                    q_value_condition = float(row_q_value) <= comp_q_value

                    up_matches_condition = (p_value_condition and q_value_condition and
                                                         (float(row_fold_change_cutoff) >=
                                                         comp_fold_change_cutoff))

                    down_matches_condition = (p_value_condition and q_value_condition and
                                             (float(row_fold_change_cutoff) <=
                                             -comp_fold_change_cutoff))

                    if up_matches_condition:
                        up_feature_ids.append(feature_id)
                    elif down_matches_condition:
                        down_feature_ids.append(feature_id)

        return list(set(up_feature_ids)), list(set(down_feature_ids))

    def _filter_expression_matrix(self, expression_matrix_ref, feature_ids,
                                  workspace_name, filtered_expression_matrix_suffix,
                                  diff_expression_matrix_ref):
        """
        _filter_expression_matrix: generated filtered expression matrix
        """

        log('start saving KBaseFeatureValues.ExpressionMatrix object')

        if isinstance(workspace_name, int) or workspace_name.isdigit():
            workspace_id = workspace_name
        else:
            workspace_id = self.dfu.ws_name_to_id(workspace_name)

        expression_matrix_obj = self.dfu.get_objects({'object_refs':
                                                     [expression_matrix_ref]})['data'][0]

        expression_matrix_info = expression_matrix_obj['info']
        expression_matrix_data = expression_matrix_obj['data']

        expression_matrix_name = expression_matrix_info[1]

        if re.match('.*_*[Ee]xpression_*[Mm]atrix', expression_matrix_name):
            filtered_expression_matrix_name = re.sub('_*[Ee]xpression_*[Mm]atrix',
                                                     filtered_expression_matrix_suffix,
                                                     expression_matrix_name)
        else:
            filtered_expression_matrix_name = expression_matrix_name + \
                filtered_expression_matrix_suffix

        filtered_expression_matrix_data = expression_matrix_data.copy()

        data = filtered_expression_matrix_data['data']

        row_ids = data['row_ids']
        values = data['values']
        filtered_data = data.copy()

        filtered_row_ids = list()
        filtered_values = list()
        for pos, row_id in enumerate(row_ids):
            if row_id in feature_ids:
                filtered_row_ids.append(row_id)
                filtered_values.append(values[pos])

        filtered_data['row_ids'] = filtered_row_ids
        filtered_data['values'] = filtered_values

        filtered_expression_matrix_data['data'] = filtered_data

        object_type = 'KBaseFeatureValues.ExpressionMatrix'
        save_object_params = {
            'id': workspace_id,
            'objects': [{'type': object_type,
                         'data': filtered_expression_matrix_data,
                         'name': filtered_expression_matrix_name,
                         'extra_provenance_input_refs': [diff_expression_matrix_ref]}]}

        dfu_oi = self.dfu.save_objects(save_object_params)[0]
        filtered_expression_matrix_ref = str(
            dfu_oi[6]) + '/' + str(dfu_oi[0]) + '/' + str(dfu_oi[4])

        return filtered_expression_matrix_ref

    def _xor(self, a, b):
        return bool(a) != bool(b)

    def _check_input_labels(self, condition_pairs, available_condition_labels):
        """
        _check_input_labels: check input condition pairs
        """
        checked = True
        for condition_pair in condition_pairs:

            label_string = condition_pair['label_string'][0].strip()
            label_list = map(lambda x: x.strip(), label_string.split(','))
            first_label = label_list[0]
            second_label = label_list[1]

            if first_label not in available_condition_labels:
                error_msg = 'Condition: {} is not availalbe. '.format(first_label)
                error_msg += 'Available conditions: {}'.format(available_condition_labels)
                raise ValueError(error_msg)

            if second_label not in available_condition_labels:
                error_msg = 'Condition: {} is not availalbe. '.format(second_label)
                error_msg += 'Available conditions: {}'.format(available_condition_labels)
                raise ValueError(error_msg)

            if first_label == second_label:
                raise ValueError('Input conditions are the same')

        return checked

    def _get_condition_labels(self, diff_expression_set_ref):
        """
        _get_condition_labels: get all possible condition label pairs
        """
        log('getting all possible condition pairs')

        condition_label_pairs = list()
        available_condition_labels = set()
        diff_expression_set_obj = self.ws.get_objects2({'objects':
                                                       [{'ref': diff_expression_set_ref}]
                                                        })['data'][0]
        diff_expression_set_data = diff_expression_set_obj['data']
        items = diff_expression_set_data.get('items')
        for item in items:
            label_string = item['label']
            label_list = map(lambda x: x.strip(), label_string.split(','))
            condition_label_pairs.append(label_list)
            map(lambda x: available_condition_labels.add(x), label_list)

        log('all pssible conditon pairs:\n{}'.format(condition_label_pairs))

        return condition_label_pairs, available_condition_labels

    def __init__(self, config):
        self.ws_url = config["workspace-url"]
        self.callback_url = config['SDK_CALLBACK_URL']
        self.token = config['KB_AUTH_TOKEN']
        self.shock_url = config['shock-url']
        self.ws = Workspace(self.ws_url, token=self.token)
        self.dfu = DataFileUtil(self.callback_url)
        self.scratch = config['scratch']

    def upload_featureset_from_diff_expr(self, params):
        """
        upload_featureset_from_diff_expr: create FeatureSet from RNASeqDifferentialExpression
                                          based on given threshold cutoffs

        required params:
        diff_expression_ref: DifferetialExpressionMatrixSet object reference
        expression_matrix_ref: ExpressionMatrix object reference
        p_cutoff: p value cutoff
        q_cutoff: q value cutoff
        fold_scale_type: one of ["linear", "log2+1", "log10+1"]
        fold_change_cutoff: fold change cutoff
        feature_set_suffix: Result FeatureSet object name suffix
        filtered_expression_matrix_suffix: Result ExpressionMatrix object name suffix
        workspace_name: the name of the workspace it gets saved to

        return:
        result_directory: folder path that holds all files generated
        up_feature_set_ref_list: list of generated upper FeatureSet object reference
        down_feature_set_ref_list: list of generated down FeatureSet object reference
        filtered_expression_matrix_ref_list: list of generated filtered ExpressionMatrix object ref
        report_name: report name generated by KBaseReport
        report_ref: report reference generated by KBaseReport
        """

        self._validate_upload_featureset_from_diff_expr_params(params)

        diff_expression_set_ref = params.get('diff_expression_ref')
        diff_expression_set_info = self.ws.get_object_info3({"objects":
                                                            [{"ref": diff_expression_set_ref}]}
                                                            )['infos'][0]
        diff_expression_set_name = diff_expression_set_info[1]

        result_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(result_directory)

        (available_condition_label_pairs,
         available_condition_labels) = self._get_condition_labels(diff_expression_set_ref)

        run_all_combinations = params.get('run_all_combinations')
        condition_pairs = params.get('condition_pairs')
        if not self._xor(run_all_combinations, condition_pairs):
            error_msg = "Invalid input:\nselect 'Run All Paired Condition Combinations' "
            error_msg += "or provide partial condition pairs. Don't do both or neither"
            raise ValueError(error_msg)

        if run_all_combinations:
            condition_label_pairs = available_condition_label_pairs
        else:
            if self._check_input_labels(condition_pairs, available_condition_labels):
                condition_label_pairs = list()
                for condition_pair in condition_pairs:
                    label_string = condition_pair['label_string'][0].strip()
                    condition_labels = map(lambda x: x.strip(), label_string.split(','))
                    condition_label_pairs.append(condition_labels)

        up_feature_set_ref_list = list()
        down_feature_set_ref_list = list()
        filtered_expression_matrix_ref_list = list()

        for condition_label_pair in condition_label_pairs:
            condition_string = '_' + '_'.join(condition_label_pair)
            diff_expr_matrix_file, genome_id, diff_expr_matrix_ref = self._process_diff_expression(
                                                                diff_expression_set_ref,
                                                                result_directory,
                                                                condition_label_pair)
            up_feature_ids, down_feature_ids = self._process_matrix_file(
                                                                diff_expr_matrix_file,
                                                                params.get('p_cutoff'),
                                                                params.get('q_cutoff'),
                                                                params.get('fold_change_cutoff'))
            filtered_expression_matrix_suffix = condition_string + \
                params.get('filtered_expression_matrix_suffix')
            if params.get('expression_matrix_ref'):
                filtered_expression_matrix_ref = self._filter_expression_matrix(
                                                params.get('expression_matrix_ref'),
                                                up_feature_ids + down_feature_ids,
                                                params.get('workspace_name'),
                                                filtered_expression_matrix_suffix,
                                                diff_expr_matrix_ref)
                filtered_expression_matrix_ref_list.append(filtered_expression_matrix_ref)

            feature_set_suffix = params.get('feature_set_suffix')
            up_feature_set_name = diff_expression_set_name + \
                condition_string + '_up' + feature_set_suffix
            up_feature_set_ref = self._generate_feature_set(up_feature_ids,
                                                            genome_id,
                                                            params.get('workspace_name'),
                                                            up_feature_set_name)
            up_feature_set_ref_list.append(up_feature_set_ref)

            down_feature_set_name = diff_expression_set_name + \
                condition_string + '_down' + feature_set_suffix
            down_feature_set_ref = self._generate_feature_set(down_feature_ids,
                                                              genome_id,
                                                              params.get('workspace_name'),
                                                              down_feature_set_name)
            down_feature_set_ref_list.append(down_feature_set_ref)

        returnVal = {'result_directory': result_directory,
                     'up_feature_set_ref_list': up_feature_set_ref_list,
                     'down_feature_set_ref_list': down_feature_set_ref_list,
                     'filtered_expression_matrix_ref_list': filtered_expression_matrix_ref_list}

        report_output = self._generate_report(up_feature_set_ref_list, down_feature_set_ref_list,
                                              filtered_expression_matrix_ref_list,
                                              params.get('workspace_name'))
        returnVal.update(report_output)

        return returnVal
 def setUpClass(cls):
     token = environ.get('KB_AUTH_TOKEN', None)
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('NarrativeService'):
         cls.cfg[nameval[0]] = nameval[1]
     authServiceUrl = cls.cfg.get(
         'auth-service-url',
         "https://kbase.us/services/authorization/Sessions/Login")
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'NarrativeService',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.serviceWizardURL = cls.cfg['service-wizard']
     cls.wsClient1 = Workspace(cls.wsURL, token=token)
     cls.serviceImpl = NarrativeService(cls.cfg)
     cls.SetAPI_version = cls.cfg['setapi-version']
     cls.DataPalette_version = cls.cfg['datapaletteservice-version']
     cls.intro_text_file = cls.cfg['intro-markdown-file']
     # Second user
     test_cfg_file = '/kb/module/work/test.cfg'
     test_cfg_text = "[test]\n"
     with open(test_cfg_file, "r") as f:
         test_cfg_text += f.read()
     config = ConfigParser()
     config.readfp(StringIO.StringIO(test_cfg_text))
     test_cfg_dict = dict(config.items("test"))
     if 'test_token2' not in test_cfg_dict:
         raise ValueError(
             "Configuration in <module>/test_local/test.cfg file should " +
             "include second user credentials ('test_token2' key)")
     token2 = test_cfg_dict['test_token2']
     user2 = auth_client.get_user(token2)
     print("Test user2: " + user2)
     cls.ctx2 = MethodContext(None)
     cls.ctx2.update({
         'token':
         token2,
         'user_id':
         user2,
         'provenance': [{
             'service': 'NarrativeService',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsClient2 = Workspace(cls.wsURL, token=token2)
     cls.wsClients = [cls.wsClient1, cls.wsClient2]
     cls.createdWorkspaces = [[], []]
     # Example objects:
     cls.example_ws_name = cls.createWsStatic(0)
     # Reads
     cls.example_reads_name = "example_reads.1"
     foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL'])
     info1 = foft.create_fake_reads({
         'ws_name': cls.example_ws_name,
         'obj_names': [cls.example_reads_name]
     })[0]
     cls.example_reads_ref = str(info1[6]) + '/' + str(
         info1[0]) + '/' + str(info1[4])
     # Genome
     cls.example_genome_name = "example_genome.1"
     foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL'])
     info2 = foft.create_fake_genomes({
         'ws_name': cls.example_ws_name,
         'obj_names': [cls.example_genome_name]
     })[0]
     cls.example_genome_ref = str(info2[6]) + '/' + str(
         info2[0]) + '/' + str(info2[4])
     # Other objects
     foft.create_any_objects({
         'ws_name':
         cls.example_ws_name,
         'obj_names': ['any_obj_' + str(i) for i in range(0, 30)]
     })
Пример #35
0
 def __init__(self, config):
     self.cfg = config
     self.scratch = config['scratch']
     self.gsu = GenomeSearchUtil(os.environ['SDK_CALLBACK_URL'])
     self.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL'])
     self.ws = Workspace(config["workspace-url"])
Пример #36
0
class FeatureSetDownload:
    def __init__(self, config):
        self.cfg = config
        self.scratch = config['scratch']
        self.gsu = GenomeSearchUtil(os.environ['SDK_CALLBACK_URL'])
        self.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL'])
        self.ws = Workspace(config["workspace-url"])

    @staticmethod
    def validate_params(params, expected={"workspace_name", "featureset_name"}):
        expected = set(expected)
        pkeys = set(params)
        if expected - pkeys:
            raise ValueError("Required keys {} not in supplied parameters"
                             .format(", ".join(expected - pkeys)))

    def to_tsv(self, params):
        working_dir = os.path.join(self.scratch,
                                   'featureset-download-'+str(uuid.uuid4()))
        os.makedirs(working_dir)
        header = ['Feature Id', 'Aliases', 'Genome', 'Type', 'Function']

        fs_name, fs_dicts = self.make_featureset_dict(params['featureset_ref'])
        files = {'file_path': "{}/{}.tsv".format(working_dir, fs_name)}
        writer = csv.DictWriter(open(files['file_path'], 'w'), header, delimiter='\t',
                                lineterminator='\n')
        writer.writeheader()
        for feat in fs_dicts:
            writer.writerow(feat)
        return fs_name, files

    def make_featureset_dict(self, fs_ref):
        features = []
        ret = self.dfu.get_objects({'object_refs': [fs_ref]})['data'][0]
        feat_set = ret['data']
        fs_name = ret['info'][1]

        feat_by_genome = defaultdict(list)
        for k, v in feat_set['elements'].items():
            feat_by_genome[v[0]].append(k)

        for genome, fids in feat_by_genome.items():
            genome_name = self.ws.get_object_info3({'objects': [{'ref': genome}]})['infos'][0][1]
            res = self.gsu.search({'ref': genome,
                                   'structured_query': {'feature_id': fids},
                                   'sort_by': [['contig_id', 1]],
                                   'start': 0,
                                   'limit': len(fids)
                                   })

            for feat in res['features']:
                features.append({'Feature Id': feat['feature_id'],
                                 'Aliases': ", ".join(feat['aliases'].keys()),
                                 'Genome': "{} ({})".format(genome_name, genome),
                                 'Type': feat['feature_type'],
                                 'Function': feat['function']
                                 })
        return fs_name, features

    def export(self, files, name, params):
        export_package_dir = os.path.join(self.scratch, name+str(uuid.uuid4()))
        os.makedirs(export_package_dir)
        for file in files:
            shutil.move(file, os.path.join(export_package_dir,
                                           os.path.basename(file)))

        # package it up and be done
        package_details = self.dfu.package_for_download({
            'file_path': export_package_dir,
            'ws_refs': [params['featureset_ref']]
        })

        return {'shock_id': package_details['shock_id']}
class AveExpressionMatrixBuilder:

    def _validate_calculate_average_expression_matrix_params(self, params):
        """
        _validate_calculate_average_expression_matrix_params:
                validates params passed to calculate_average_expression_matrix method
        """

        log('start validating calculate_average_expression_matrix params')

        # check for required parameters
        for p in ['expression_matrix_ref', 'output_suffix', 'workspace_name']:
            if p not in params:
                raise ValueError('"{}" parameter is required, but missing'.format(p))

    def _generate_report(self, expression_matrix_ref, workspace_name):
        """
        _generate_report: generate report
        """

        objects_created = [{'ref': expression_matrix_ref,
                            'description': 'Average ExpressionMatrix'}]

        report_params = {'message': '',
                         'workspace_name': workspace_name,
                         'objects_created': objects_created,
                         # 'html_links': output_html_files,
                         # 'direct_html_link_index': 0,
                         'html_window_height': 366,
                         'report_object_name': 'kb_ave_expr_matrix_report_' + str(uuid.uuid4())}

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output

    def _save_expression_matrix(self, em_data, em_obj_name, workspace_name):
        """
        _save_expression_matrix: saving ExpressionMatrix
        """

        try:
            log('saving ExpressionMatrix [{}]'.format(em_obj_name))
        
            data_type = 'KBaseFeatureValues.ExpressionMatrix'
            obj_info = self.dfu.save_objects({'id': self.dfu.ws_name_to_id(workspace_name),
                                              'objects': [{'type': data_type,
                                                           'data': em_data,
                                                           'name': em_obj_name}]})[0]
        except Exception as e:
            log(e)
            raise Exception('Failed Saving ExpressionMatrix to Workspace')

        expression_matrix_ref = str(obj_info[6]) + '/' + str(obj_info[0]) + '/' + str(obj_info[4])

        return expression_matrix_ref

    def __init__(self, config):
        self.ws_url = config["workspace-url"]
        self.callback_url = config['SDK_CALLBACK_URL']
        self.token = config['KB_AUTH_TOKEN']
        self.shock_url = config['shock-url']
        self.ws = Workspace(self.ws_url, token=self.token)
        self.dfu = DataFileUtil(self.callback_url)
        self.scratch = config['scratch']

    def calculate_average_expression_matrix(self, params):
        """
        calculate_average_expression_matrix: create an average ExpressionMatrix object 
                                             from a ExpressionMatrix object

        required params:
        expression_matrix_ref: ExpressionMatrix object reference
        output_suffix: output average ExpressionMatrix name suffix
        workspace_name: the name of the workspace it gets saved to
        
        return:
        average_expression_matrix_ref: generated average ExpressionMatrix object reference
        report_name: report name generated by KBaseReport
        report_ref: report reference generated by KBaseReport
        """

        log('--->\nrunning AveExpressionMatrixBuilder.calculate_average_expression_matrix\n' +
            'params:\n{}'.format(json.dumps(params, indent=1)))

        self._validate_calculate_average_expression_matrix_params(params)

        expression_matrix_ref = params.get('expression_matrix_ref')
        expression_matrix = self.ws.get_objects2({'objects':
                                                  [{'ref': 
                                                    expression_matrix_ref}]})['data'][0]

        expression_matrix_data = expression_matrix['data']
        expression_matrix_info = expression_matrix['info']

        condition_map = expression_matrix_data['condition_mapping']

        ori_data = expression_matrix_data['data']
        ori_col_ids = ori_data['col_ids']
        ori_row_ids = ori_data['row_ids']
        ori_values = ori_data['values']

        labels = condition_map.keys()

        if set(labels) != set(ori_col_ids):
            error_msg = 'available labels: {}\n'.format(ori_col_ids)
            error_msg += 'labels in condition_mapping: {}'.format(labels)
            raise ValueError(error_msg)

        condition_pos = {}

        for label, condition in condition_map.iteritems():
            if condition not in condition_pos:
                condition_pos.update({condition: [ori_col_ids.index(label)]})
            else:
                condition_list = condition_pos[condition]
                condition_list.append(ori_col_ids.index(label))
                condition_pos.update({condition: condition_list})

        conditions = condition_pos.keys()

        ave_values = []
        for ori_value in ori_values:
            ave_value = [None] * len(conditions)
            for condition, poss in condition_pos.iteritems():
                ave_pos = conditions.index(condition)
                sum_value = 0.0
                for pos in poss:
                    sum_value += round(float(ori_value[pos]), 3) 
                average = sum_value / len(poss)
                ave_value[ave_pos] = round(average, 2)

            ave_values.append(ave_value)

        average_data = {}
        average_data.update({'row_ids': ori_row_ids})
        average_data.update({'col_ids': conditions})
        average_data.update({'values': ave_values})

        em_data = {}
        genome_ref = expression_matrix_data.get('genome_ref')
        if genome_ref:
            em_data.update({'genome_ref': genome_ref})
        em_data.update({'scale': expression_matrix_data.get('scale')})
        em_data.update({'type': expression_matrix_data.get('type')})
        em_data.update({'feature_mapping': expression_matrix_data.get('feature_mapping')})
        em_data.update({'condition_mapping': expression_matrix_data.get('condition_mapping')})
        em_data.update({'data': average_data})

        expression_matrix_name = expression_matrix_info[1]
        ave_expression_matrix_name = expression_matrix_name + params.get('output_suffix')

        workspace_name = params.get('workspace_name')

        ave_expression_matrix_ref = self._save_expression_matrix(em_data, 
                                                                 ave_expression_matrix_name, 
                                                                 workspace_name)

        returnVal = {'average_expression_matrix_ref': ave_expression_matrix_ref}

        report_output = self._generate_report(ave_expression_matrix_ref,
                                              workspace_name)
        returnVal.update(report_output)

        return returnVal