def test_loadGenome(self):
        ''' Load a test Genome object into the test workspace. '''
        
        # Create the test workspace.
        wsClient = Workspace(self._config["workspace_url"], token=self._token)
        try:
            # See if the workspace exists.
            wsInfo = wsClient.get_workspace_info( { "workspace": self._config["test_ws"] } )
        except WorkspaceServerError as e:
            # Hopefully this means the workspace does not exist. (It could also mean someone messed up setting up the URLs)
            traceback.print_exc(file=sys.stderr)
            wsInfo = wsClient.create_workspace( { "workspace": self._config["test_ws"] } )

        # We also need to put in a mapping and a biochemistry object somewhere.
        # To do this, I just create a "dependency workspace" and pull them from there.
        try:
            # See if the workspace exists.
            wsInfo = wsClient.get_workspace_info( { "workspace": self._config["dependency_ws"] } )
        except WorkspaceServerError as e:
            # Hopefully this means the workspace does not exist. (It could also mean someone messed up setting up the URLs)
#            traceback.print_exc(file=sys.stderr)
            depWsInfo = wsClient.create_workspace( { "workspace": self._config["dependency_ws"] } )

        # Load the mapping and biochemistry objects
        testContigSet = json.load(open(self._config['contigset_file'], 'r'))
        contigSetSaveData = dict()
        contigSetSaveData['type'] = 'KBaseGenomes.ContigSet'
        contigSetSaveData['name'] = self._config['contigsetid']
        contigSetSaveData['data'] = testContigSet        
        testGenome = json.load(open(self._config["genome_file"], "r"))
        genomeSaveData = dict()
        genomeSaveData['type'] = 'KBaseGenomes.Genome'
        genomeSaveData['name'] = self._config['genomeid']
        genomeSaveData['data'] = testGenome
        wsClient.save_objects( { 'workspace': self._config['test_ws'], 'objects': [ genomeSaveData, contigSetSaveData ] } )
Exemplo n.º 2
0
def handler (args) :
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    indata = wsd.get_object({'id' : args.inobj_id,
                  #'type' : 'KBaseExpression.ExpressionSeries', 
                  'workspace' : args.ws_id})['data']

    if indata is None:
        raise Exception("Object {} not found in workspace {}".format(args.inobj_id, args.ws_id))


    ###
    # execute filtering
    flt_cmd_lst = ['mys_example', "-i", "{}-{}".format(os.getpid(),args.exp_fn) ]
    if (args.method     is not None): 
        flt_cmd_lst.append('-m')
        flt_cmd_lst.append(args.method)
    if (args.p_value    is not None): 
        flt_cmd_lst.append('-p')
        flt_cmd_lst.append(args.p_value)
    if (args.num_genes  is not None): 
        flt_cmd_lst.append('-n')
        flt_cmd_lst.append(args.num_genes)
    if (args.flt_out_fn is not None): 
        flt_cmd_lst.append('-o')
        flt_cmd_lst.append("{}-{}".format(os.getpid(),args.flt_out_fn))

    p1 = Popen(flt_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    # print output message for error tracking
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
    flt_cmd = " ".join(flt_cmd_lst)
   
    ###
    # put it back to workspace
    #fif = open("{}-{}".format(os.getpid(),args.flt_out_fn), 'r')
    #fif.readline(); # skip header
    
    # assume only one genome id
    outdata = {}
    outdata['key'] = indata['key']
    outdata['value'] = "{}{}".format(indata['value'], indata['value'])
    data_list = []
    data_list.append({'type' : 'MyService.PairString', 'data' : outdata, 'name' : args.outobj_id, 'meta' : {'org.series' : args.inobj_id}})
    wsd.save_objects({'workspace' : args.ws_id, 'objects' : data_list})

    if(args.del_tmps is "true") :
        os.remove("{}-{}".format(os.getpid(), args.exp_fn))
        os.remove("{}-{}".format(os.getpid(), args.flt_out_fn))
Exemplo n.º 3
0
 def test_handles(self):
     wsName = self.generatePesudoRandomWorkspaceName()
     self.ws.set_permissions({
         'workspace': wsName,
         'new_permission': 'w',
         'users': [self.ctx2['user_id']]
     })
     temp_shock_file = "/kb/module/work/tmp/shock1.txt"
     with open(temp_shock_file, "w") as f1:
         f1.write("Test Shock Handle")
     token1 = self.ctx['token']
     dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL'], token=token1)
     handle1 = dfu.file_to_shock({
         'file_path': temp_shock_file,
         'make_handle': 1
     })['handle']
     hid1 = handle1['hid']
     genome_name = "Genome.1"
     ws2 = Workspace(self.cfg['workspace-url'], token=token1)
     ws2.save_objects({
         'workspace':
         wsName,
         'objects': [{
             'name': genome_name,
             'type': 'KBaseGenomes.Genome',
             'data': {
                 'id': "qwerty",
                 'scientific_name': "Qwerty",
                 'domain': "Bacteria",
                 'genetic_code': 11,
                 'genbank_handle_ref': hid1
             }
         }]
     })
     genome = self.impl.get_genome_v1(
         self.ctx2, {'genomes': [{
             'ref': wsName + '/' + genome_name
         }]})[0]['genomes'][0]['data']
     self.impl.save_one_genome_v1(self.ctx2, {
         'workspace': wsName,
         'name': genome_name,
         'data': genome
     })[0]
     genome = self.impl.get_genome_v1(
         self.ctx2, {'genomes': [{
             'ref': wsName + '/' + genome_name
         }]})[0]['genomes'][0]['data']
     self.assertTrue('genbank_handle_ref' in genome)
     hid2 = genome['genbank_handle_ref']
     self.assertNotEqual(hid1, hid2)
def upload_workspace_data(cs, ws_url, source_ref, target_ws, obj_name):
    ws = Workspace(ws_url, token=TOKEN)
    type_ = ws.translate_from_MD5_types([CS_MD5_TYPE])[CS_MD5_TYPE][0]
    ws.save_objects(
        {'workspace': target_ws,
         'objects': [{'name': obj_name,
                      'type': type_,
                      'data': cs,
                      'provenance': [{'script': SCRIPT_NAME,
                                      'script_ver': __VERSION__,
                                      'input_ws_objects': [source_ref],
                                      }]
                      }
                     ]
         }
    )
Exemplo n.º 5
0
def upload_narrative(nar_file, auth_token, user_id, url=ci_ws, set_public=False):
    """
    Uploads a Narrative from a downloaded object file.
    This file needs to be in JSON format, and it expects all
    data and info that is usually returned by the Workspace.get_objects
    method.

    Returns a dict of three elements:
        ws: the id of the workspace that was created
        obj: the id of the narrative object
        ref: the above two joined together into an object ref (for convenience)
    """

    # read the file
    f = open(nar_file, "r")
    nar = json.loads(f.read())
    f.close()

    # do some setup.
    current_nar_metadata = ws_metadata
    current_nar_metadata["narrative_nice_name"] = nar["data"]["metadata"]["name"]
    ws_client = Workspace(url=url, token=auth_token)

    # create the new workspace for the narrative
    ws_info = ws_client.create_workspace(
        {
            "workspace": "{}:{}".format(user_id, str(time.time()).replace(".", "")),
            "meta": current_nar_metadata,
            "globalread": "r" if set_public else "n",
        }
    )
    ws_id = ws_info[0]

    # setup and save the narrative object
    nar["info"][10]
    ws_save_obj = {
        "type": "KBaseNarrative.Narrative",
        "data": nar["data"],
        "name": nar["info"][1],
        "meta": nar["info"][10],
        "provenance": [
            {
                "script": "upload_narrative_test.py",
                "description": "Temporary Narrative uploaded for automated testing",
            }
        ],
    }
    obj_info = ws_client.save_objects({"id": ws_id, "objects": [ws_save_obj]})

    # tweak the workspace's metadata to properly present its narrative
    ws_client.alter_workspace_metadata(
        {"wsi": {"id": ws_id}, "new": {"narrative": obj_info[0][0]}}
    )
    return {
        "ws": ws_info[0],
        "obj": obj_info[0][0],
        "refstr": "{}/{}".format(ws_info[0], obj_info[0][0]),
        "ref": NarrativeRef({"wsid": ws_info[0], "objid": obj_info[0][0]}),
    }
Exemplo n.º 6
0
    def create(self, ctx, params):
        # ctx is the context object
        # return variables are: info
        #BEGIN create

        print('Creating KBase Report.')

        # check that the basic parameters are set
        if 'report' not in params:
            raise ValueError('Field "report" must be defined to save a report')
        if 'workspace_name' not in params:
            raise ValueError(
                'Field "workspace_name" must be defined to save a report')

        # setup proper provenance for the report
        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']

        # generate a random report name
        reportName = 'report_' + str(uuid.uuid4())
        if 'prefix' in params:
            reportName = params['prefix'] + reportName

        print('Report Name' + reportName)

        # let any workspace errors just percolate up for now
        ws = Workspace(self.workspaceURL, token=ctx['token'])
        report_info = ws.save_objects({
            'workspace':
            params['workspace_name'],
            'objects': [{
                'type': 'KBaseReport.Report',
                'data': params['report'],
                'name': reportName,
                'meta': {},
                'hidden': 1,
                'provenance': provenance
            }]
        })[0]

        info = {
            'ref':
            str(report_info[6]) + '/' + str(report_info[0]) + '/' +
            str(report_info[4]),
            'name':
            report_info[1]
        }

        #END create

        # At some point might do deeper type checking...
        if not isinstance(info, dict):
            raise ValueError('Method create return value ' +
                             'info is not type dict as required.')
        # return the results
        return [info]
Exemplo n.º 7
0
    def create(self, ctx, params):
        # ctx is the context object
        # return variables are: info
        #BEGIN create

        print('Creating KBase Report.')

        # check that the basic parameters are set
        if 'report' not in params:
            raise ValueError('Field "report" must be defined to save a report')
        if 'workspace_name' not in params:
            raise ValueError('Field "workspace_name" must be defined to save a report')

        # setup proper provenance for the report
        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']

        # generate a random report name
        reportName = 'report_'+str(uuid.uuid4())
        if 'prefix' in params:
            reportName = params['prefix'] + reportName


        print('Report Name' + reportName)

        # let any workspace errors just percolate up for now
        ws = Workspace(self.workspaceURL, token=ctx['token'])
        report_info = ws.save_objects({
                'workspace':params['workspace_name'],
                'objects':[
                    {
                        'type':'KBaseReport.Report',
                        'data':params['report'],
                        'name':reportName,
                        'meta':{},
                        'hidden':1,
                        'provenance':provenance
                    }
                ]
            })[0]

        info = {
            'ref'  : str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(report_info[4]),
            'name' : report_info[1]
        }

        #END create

        # At some point might do deeper type checking...
        if not isinstance(info, dict):
            raise ValueError('Method create return value ' +
                             'info is not type dict as required.')
        # return the results
        return [info]
    def test_loadGenome(self):
        ''' Load a test Genome object into the test workspace. '''

        # Create the test workspace.
        wsClient = Workspace(self._config["workspace_url"], token=self._token)
        try:
            # See if the workspace exists.
            wsInfo = wsClient.get_workspace_info(
                {"workspace": self._config["test_ws"]})
        except WorkspaceServerError as e:
            # Hopefully this means the workspace does not exist. (It could also mean someone messed up setting up the URLs)
            traceback.print_exc(file=sys.stderr)
            wsInfo = wsClient.create_workspace(
                {"workspace": self._config["test_ws"]})

        # We also need to put in a mapping and a biochemistry object somewhere.
        # To do this, I just create a "dependency workspace" and pull them from there.
        try:
            # See if the workspace exists.
            wsInfo = wsClient.get_workspace_info(
                {"workspace": self._config["dependency_ws"]})
        except WorkspaceServerError as e:
            # Hopefully this means the workspace does not exist. (It could also mean someone messed up setting up the URLs)
            #            traceback.print_exc(file=sys.stderr)
            depWsInfo = wsClient.create_workspace(
                {"workspace": self._config["dependency_ws"]})

        # Load the mapping and biochemistry objects
        testContigSet = json.load(open(self._config['contigset_file'], 'r'))
        contigSetSaveData = dict()
        contigSetSaveData['type'] = 'KBaseGenomes.ContigSet'
        contigSetSaveData['name'] = self._config['contigsetid']
        contigSetSaveData['data'] = testContigSet
        testGenome = json.load(open(self._config["genome_file"], "r"))
        genomeSaveData = dict()
        genomeSaveData['type'] = 'KBaseGenomes.Genome'
        genomeSaveData['name'] = self._config['genomeid']
        genomeSaveData['data'] = testGenome
        wsClient.save_objects({
            'workspace': self._config['test_ws'],
            'objects': [genomeSaveData, contigSetSaveData]
        })
Exemplo n.º 9
0
def upload_workspace_data(cs, ws_url, source_ref, target_ws, obj_name):
    ws = Workspace(ws_url, token=TOKEN)
    type_ = ws.translate_from_MD5_types([CS_MD5_TYPE])[CS_MD5_TYPE][0]
    ws.save_objects({
        'workspace':
        target_ws,
        'objects': [{
            'name':
            obj_name,
            'type':
            type_,
            'data':
            cs,
            'provenance': [{
                'script': SCRIPT_NAME,
                'script_ver': __VERSION__,
                'input_ws_objects': [source_ref],
            }]
        }]
    })
Exemplo n.º 10
0
def upload_narrative(nar_file, auth_token, user_id, url=ci_ws, set_public=False):
    """
    Uploads a Narrative from a downloaded object file.
    This file needs to be in JSON format, and it expects all
    data and info that is usually returned by the Workspace.get_objects
    method.

    Returns a dict of three elements:
        ws: the id of the workspace that was created
        obj: the id of the narrative object
        ref: the above two joined together into an object ref (for convenience)
    """

    # read the file
    f = open(nar_file, 'r')
    nar = json.loads(f.read())
    f.close()

    # do some setup.
    current_nar_metadata = ws_metadata
    current_nar_metadata['narrative_nice_name'] = nar['data']['metadata']['name']
    ws_client = Workspace(url=url, token=auth_token)

    # create the new workspace for the narrative
    ws_info = ws_client.create_workspace({
        'workspace': '{}:{}'.format(user_id, str(time.time()).replace('.', '')),
        'meta': current_nar_metadata,
        'globalread': 'r' if set_public else 'n'
    })
    ws_id = ws_info[0]

    # setup and save the narrative object
    metadata = nar['info'][10]
    ws_save_obj = {
        'type': 'KBaseNarrative.Narrative',
        'data': nar['data'],
        'name': nar['info'][1],
        'meta': nar['info'][10],
        'provenance': [{
            'script': 'upload_narrative_test.py',
            'description': 'Temporary Narrative uploaded for automated testing'
        }]
    }
    obj_info = ws_client.save_objects({'id': ws_id,
                                       'objects': [ws_save_obj]})

    # tweak the workspace's metadata to properly present its narrative
    ws_client.alter_workspace_metadata({'wsi': {'id': ws_id}, 'new': {'narrative': obj_info[0][0]}})
    return {
        'ws': ws_info[0],
        'obj': obj_info[0][0],
        'refstr': '{}/{}'.format(ws_info[0], obj_info[0][0]),
        'ref': NarrativeRef({'wsid': ws_info[0], 'objid': obj_info[0][0]})
    }
Exemplo n.º 11
0
    def manyHellos_runEach(self, ctx, task):
        """
        :param task: instance of type "ManyHellos_task" -> structure:
           parameter "msg" of String, parameter "job_number" of Long,
           parameter "workspace" of String
        :returns: instance of type "ManyHellos_runEachResult" (runEach()) ->
           structure: parameter "message" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN manyHellos_runEach
        print("this is manyHellos_runEach...")
        pprint(["task is ", task])

        res = "{0}: {1}".format(task['job_number'], task['msg'])

        ws_client = Workspace(url=self.config['workspace-url'],
                              token=ctx['token'])
        res_obj = ws_client.save_objects({
            "workspace":
            task['workspace'],
            "objects": [{
                'type':
                'KBaseReport.Report',
                "data": {
                    'objects_created': [],
                    'text_message': res
                },
                "name":
                "{0}_{1}.rpt".format(task['msg'], task['job_number']),
                "meta": {}
            }]
        })
        res = json.dumps(res_obj)

        print("exiting manyHellos_runEach(), res is", res)
        returnVal = {'message': res}
        #END manyHellos_runEach

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method manyHellos_runEach return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Exemplo n.º 12
0
def save_ws_object(obj):
    """Save an object to the workspace

    Parameters
    ----------
    obj : dict
        Object with the fields: type, data, and name.
        The type must be the full typespec
        (e.g. 'MetaboliteAtlas.Compound-0.3')

    Returns
    -------
    id : str
        Object workspace id
    """
    from biokbase.workspace.client import Workspace
    ws = Workspace(WS_URL)
    obj.setdefault('hidden', 0)
    wks = ws.list_workspaces({'excludeGlobal': 1})
    ws_id = [wk[-1] for wk in wks if wk[0] == os.environ['KB_WORKSPACE_ID']][0]
    save_objects_params = {'id': ws_id, 'objects': [obj]}
    return ws.save_objects(save_objects_params)[0][-2]
Exemplo n.º 13
0
def save_ws_object(obj):
    """Save an object to the workspace

    Parameters
    ----------
    obj : dict
        Object with the fields: type, data, and name.
        The type must be the full typespec
        (e.g. 'MetaboliteAtlas.Compound-0.3')

    Returns
    -------
    id : str
        Object workspace id
    """
    from biokbase.workspace.client import Workspace

    ws = Workspace(WS_URL)
    obj.setdefault("hidden", 0)
    wks = ws.list_workspaces({"excludeGlobal": 1})
    ws_id = [wk[-1] for wk in wks if wk[0] == os.environ["KB_WORKSPACE_ID"]][0]
    save_objects_params = {"id": ws_id, "objects": [obj]}
    return ws.save_objects(save_objects_params)[0][-2]
Exemplo n.º 14
0
    def SetupRNASeqAnalysis(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        # BEGIN SetupRNASeqAnalysis
        user_token = ctx["token"]
        ws_client = Workspace(url=self.__WS_URL, token=user_token)
        out_obj = {k: v for k, v in params.iteritems() if not k in ("ws_id", "genome_id", "annotation_id") and v}
        pprint(out_obj)
        if "num_samples" in out_obj:
            out_obj["num_samples"] = int(out_obj["num_samples"])
        if "num_replicates" in out_obj:
            out_obj["num_replicates"] = int(out_obj["num_replicates"])
        if "genome_id" in params and params["genome_id"] is not None:
            out_obj["genome_id"] = script_util.get_obj_info(
                self.__LOGGER, self.__WS_URL, [params["genome_id"]], params["ws_id"], user_token
            )[0]
        if "annotation_id" in params and params["annotation_id"] is not None:
            g_ref = script_util.get_obj_info(
                self.__LOGGER, self.__WS_URL, [params["annotation_id"]], params["ws_id"], user_token
            )[0]
            out_obj["annotation_id"] = g_ref
        self.__LOGGER.info("Uploading RNASeq Analysis object to workspace {0}".format(out_obj["experiment_id"]))
        try:
            res = ws_client.save_objects(
                {
                    "workspace": params["ws_id"],
                    "objects": [
                        {"type": "KBaseRNASeq.RNASeqAnalysis", "data": out_obj, "name": out_obj["experiment_id"]}
                    ],
                }
            )
            returnVal = {"workspace": params["ws_id"], "output": out_obj["experiment_id"]}

        except Exception, e:
            raise KBaseRNASeqException(
                "Error Saving the object to workspace {0},{1}".format(out_obj["experiment_id"], e)
            )
Exemplo n.º 15
0
    def diff_p_distribution(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN diff_p_distribution
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FLTRD_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
        token = ctx['token']
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token

        param = args
 
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        expr = ws.get_objects([{'workspace': param['workspace_name'], 'name' : param['object_name']}])[0]['data']
 
 
        cmd_dowload_cvt_tsv = [self.FVE_2_TSV, '--workspace_service_url', self.__WS_URL, 
                                          '--workspace_name', param['workspace_name'],
                                          '--object_name', param['object_name'],
                                          '--working_directory', self.RAWEXPR_DIR,
                                          '--output_file_name', self.EXPRESS_FN
                              ]
 
        # need shell in this case because the java code is depending on finding the KBase token in the environment
        #  -- copied from FVE_2_TSV
        tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv), stderr=subprocess.PIPE, shell=True, env=eenv)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
 
        self.logger.info("Identifying differentially expressed genes")
 
        ## Prepare sample file
        # detect num of columns
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), 'r') as f:
          fl = f.readline()
        ncol = len(fl.split('\t'))
        
        # force to use ANOVA if the number of sample is two
        if(ncol == 3): param['method'] = 'anova'
 
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN), 'wt') as s:
          s.write("0")
          for j in range(1,ncol-1):
            s.write("\t{0}".format(j))
          s.write("\n")
 
 
        ## Run coex_filter
        cmd_coex_filter = [self.COEX_FILTER, '-i', "{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), '-o', "{0}/{1}".format(self.FLTRD_DIR, self.FLTRD_FN),
                           '-m', param['method'], '-n', '10', '-s', "{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN),
                           '-x', "{0}/{1}".format(self.RAWEXPR_DIR, self.GENELST_FN), '-t', 'y', '-j', self.PVFDT_FN]
        if 'num_features' in param:
          cmd_coex_filter.append("-n")
          cmd_coex_filter.append(str(param['num_features']))
 
        if 'p_value' in param:
          cmd_coex_filter.append("-p")
          cmd_coex_filter.append(str(param['p_value']))
 
 
        tool_process = subprocess.Popen(cmd_coex_filter, stderr=subprocess.PIPE)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
 
        ## loading pvalue distribution FDT
        pvfdt = {'row_labels' :[], 'column_labels' : [], "data" : [[]]};
        pvfdt = OrderedDict(pvfdt)
        with open(self.PVFDT_FN, 'r') as myfile:
           pvfdt = json.load(myfile)
        data_obj_name = "{0}.fdt".format(param['out_figure_object_name'])
        pvfdt['id'] = data_obj_name
 
 
        fig_properties = {"xlabel" : "-log2(p-value)", "ylabel" : "Number of features", "xlog_mode" : "-log2", "ylog_mode" : "none", "title" : "Histogram of P-values", "plot_type" : "histogram"}
        sstatus = ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'MAK.FloatDataTable',
                                                                              'data' : pvfdt,
                                                                              'name' : data_obj_name}]})

        data_ref = "{0}/{1}/{2}".format(sstatus[0][6], sstatus[0][0], sstatus[0][4])
        fig_properties['data_ref'] = data_ref

        sstatus = ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'CoExpression.FigureProperties',
                                                                              'data' : fig_properties,
                                                                              'name' : (param['out_figure_object_name'])}]})
        result = fig_properties
        #END diff_p_distribution

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method diff_p_distribution return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 16
0
    def const_coex_net_clust(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN const_coex_net_clust
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.CLSTR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
        token = ctx['token']

        param = args
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        expr = ws.get_objects([{'workspace': param['workspace_name'], 'name' : param['object_name']}])[0]['data']
 
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token
        cmd_dowload_cvt_tsv = [self.FVE_2_TSV, '--workspace_service_url', self.__WS_URL, 
                                          '--workspace_name', param['workspace_name'],
                                          '--object_name', param['object_name'],
                                          '--working_directory', self.RAWEXPR_DIR,
                                          '--output_file_name', self.EXPRESS_FN
                              ]
 
        # need shell in this case because the java code is depending on finding the KBase token in the environment
        #  -- copied from FVE_2_TSV
        tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv), stderr=subprocess.PIPE, shell=True, env=eenv)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
            #raise Exception(stderr)
 
        self.logger.info("Coexpression clustering analysis")
 
        ## Prepare sample file
        # detect num of columns
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), 'r') as f:
          fl = f.readline()
        ncol = len(fl.split('\t'))
        
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN), 'wt') as s:
          s.write("0")
          for j in range(1,ncol-1):
            s.write("\t{0}".format(j))
          s.write("\n")
 
 
        ## Run coex_cluster
        cmd_coex_cluster = [self.COEX_CLUSTER, '-t', 'y',
                           '-i', "{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), 
                           '-o', "{0}/{1}".format(self.CLSTR_DIR, self.CLSTR_FN), '-m', "{0}/{1}".format(self.CLSTR_DIR, self.CSTAT_FN) ]
 
        for p in ['net_method', 'minRsq', 'maxmediank', 'maxpower', 'clust_method', 'minModuleSize', 'detectCutHeight']:
           if p in param:
             cmd_coex_cluster.append("--{0}".format(p))
             cmd_coex_cluster.append(str(param[p]))
  
 
        #sys.exit(2) #TODO: No error handling in narrative so we do graceful termination
 
        #if 'p_value' in param and 'num_features' in param:
        #  self.logger.error("Both of p_value and num_features cannot be defined together");
        #  sys.exit(3)
 
        tool_process = subprocess.Popen(cmd_coex_cluster, stderr=subprocess.PIPE)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            if re.search(r'^There were \d+ warnings \(use warnings\(\) to see them\)', stderr):
              self.logger.info(stderr)
            else:
              self.logger.error(stderr)
              raise Exception(stderr)
 
        
        # build index for gene list
        pos_index ={expr['data']['row_ids'][i]: i for i in range(0, len(expr['data']['row_ids']))}
 
 
        # parse clustering results
        cid2genelist = {}
        cid2stat = {}
        with open("{0}/{1}".format(self.CLSTR_DIR, self.CSTAT_FN),'r') as glh:
            glh.readline() # skip header
            for line in glh:
                cluster, mcor, msec = line.rstrip().replace('"','').split("\t")
                cid2stat[cluster]= [mcor, msec]
        with open("{0}/{1}".format(self.CLSTR_DIR, self.CLSTR_FN),'r') as glh:
            glh.readline() # skip header
            for line in glh:
                gene, cluster = line.rstrip().replace('"','').split("\t")
                if cluster not in cid2genelist:
                    cid2genelist[cluster] = []
                cid2genelist[cluster].append(gene)
 
        if(len(cid2genelist) < 1) :
          self.logger.error("Clustering failed")
          return empty_results("Error: No cluster output", expr,self.__WS_URL, param, self.logger, ws)
          #sys.exit(4)
 
        self.logger.info("Uploading the results onto WS")
        feature_clusters = []
        for cluster in cid2genelist:
            feature_clusters.append( {"meancor": float(cid2stat[cluster][0]), "msec": float(cid2stat[cluster][0]), "id_to_pos" : { gene : pos_index[gene] for gene in cid2genelist[cluster]}})

        ## Upload Clusters
        feature_clusters ={"original_data": "{0}/{1}".format(param['workspace_name'],param['object_name']),
                           "feature_clusters": feature_clusters}
 
        ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'KBaseFeatureValues.FeatureClusters',
                                                                          'data' : feature_clusters,
                                                                          'name' : (param['out_object_name'])}]})
        result = {'workspace_name' : param['workspace_name'], 'out_object_name' : param['out_object_name']}
        #END const_coex_net_clust

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method const_coex_net_clust return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 17
0
def run_filter_genes(workspace_service_url=None, param_file = None, level=logging.INFO, logger = None):
    """
    Narrative Job Wrapper script to execute coex_filter
    
    Args:
        workspace_service_url:  A url for the KBase Workspace service 
        param_file: parameter file
        object_name: Name of the object in the workspace 
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        Output is written back in WS
    
    Authors:
        Shinjae Yoo
    
    """ 

    try:
        os.makedirs(RAWEXPR_DIR)
    except:
        pass
    try:
        os.makedirs(FLTRD_DIR)
    except:
        pass
    try:
        os.makedirs(FINAL_DIR)
    except:
        pass

    if logger is None:
        logger = script_utils.stderrlogger(__file__)
    
    logger.info("Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
    token = os.environ.get("KB_AUTH_TOKEN")

    with open(param_file) as paramh:
      param = json.load(paramh)

    cmd_dowload_cvt_tsv = [FVE_2_TSV, '--workspace_service_url', workspace_service_url, 
                                      '--workspace_name', param['workspace_name'],
                                      '--object_name', param['object_name'],
                                      '--working_directory', RAWEXPR_DIR,
                                      '--output_file_name', EXPRESS_FN
                          ]

    # need shell in this case because the java code is depending on finding the KBase token in the environment
    #  -- copied from FVE_2_TSV
    tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv), stderr=subprocess.PIPE, shell=True)
    stdout, stderr = tool_process.communicate()
    
    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    logger.info("Identifying differentially expressed genes")

    ## Prepare sample file
    # detect num of columns
    with open("{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), 'r') as f:
      fl = f.readline()
    ncol = len(fl.split('\t'))
    
    with open("{0}/{1}".format(RAWEXPR_DIR, SAMPLE_FN), 'wt') as s:
      s.write("0")
      for j in range(1,ncol-1):
        s.write("\t{0}".format(j))
      s.write("\n")


    ## Run coex_filter
    cmd_coex_filter = [COEX_FILTER, '-i', "{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), '-o', "{0}/{1}".format(FLTRD_DIR, FLTRD_FN),
                       '-m', param['method'], '-s', "{0}/{1}".format(RAWEXPR_DIR, SAMPLE_FN),
                       '-x', "{0}/{1}".format(RAWEXPR_DIR, GENELST_FN), '-t', 'y']
    if 'num_features' in param:
      cmd_coex_filter.append("-n")
      cmd_coex_filter.append(param['num_features'])

    if 'num_features' not in param and 'p_value' in param:
      cmd_coex_filter.append("-p")
      cmd_coex_filter.append(param['p_value'])

    if 'p_value' not in param and 'num_features' not in param:
      logger.error("One of p_value or num_features must be defined");
      sys.exit(2)

    #if 'p_value' in param and 'num_features' in param:
    #  logger.error("Both of p_value and num_features cannot be defined together");
    #  sys.exit(3)

    tool_process = subprocess.Popen(cmd_coex_filter, stderr=subprocess.PIPE)
    stdout, stderr = tool_process.communicate()
    
    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    ## Header correction
    with open("{0}/{1}".format(FLTRD_DIR, FLTRD_FN), 'r') as ff:
        fe = ff.readlines()
    with open("{0}/{1}".format(FLTRD_DIR, FLTRD_FN), 'w') as ff:
        ff.write(fl) # use original first line that has correct header information
        fe.pop(0)
        ff.writelines(fe)
    

    ## Upload FVE
    from biokbase.workspace.client import Workspace
    ws = Workspace(url=workspace_service_url, token=os.environ['KB_AUTH_TOKEN'])
    expr = ws.get_objects([{'workspace': param['workspace_name'], 'name' : param['object_name']}])[0]['data']
    
    # change workspace to be the referenced object's workspace_name because it may not be in the same working ws due to referencing
    cmd_upload_expr = [TSV_2_FVE, '--workspace_service_url', workspace_service_url, 
                                      '--object_name', param['out_expr_object_name'],
                                      '--working_directory', FINAL_DIR,
                                      '--input_directory', FLTRD_DIR,
                                      '--output_file_name', FINAL_FN
                          ]
    tmp_ws = param['workspace_name']
    if 'genome_ref' in expr:
        cmd_upload_expr.append('--genome_object_name')
        obj_infos = ws.get_object_info_new({"objects": [{'ref':expr['genome_ref']}]})[0]

        if len(obj_infos) < 1:
            logger.error("Couldn't find {0} from the workspace".format(expr['genome_ref']))
            raise Exception("Couldn't find {0} from the workspace".format(expr['genome_ref']))

        cmd_upload_expr.append(obj_infos[1])
        tmp_ws = obj_infos[7]
        logger.info("{0} => {1} / {2}".format(expr['genome_ref'], tmp_ws, obj_infos[1]))

    # updated ws name
    cmd_upload_expr.append('--workspace_name')
    cmd_upload_expr.append(tmp_ws)

    tool_process = subprocess.Popen(" ".join(cmd_upload_expr), stderr=subprocess.PIPE, shell=True)
    stdout, stderr = tool_process.communicate()
    
    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    
    with open("{0}/{1}".format(FINAL_DIR,FINAL_FN),'r') as et:
      eo = json.load(et)

    if 'description' in expr: expr['description'] = "{0}, coex_filter by {1}".format(expr['description'], " ".join(cmd_coex_filter))
    if 'feature_mapping' in expr:
        expr['feature_mapping'] = eo['feature_mapping']
    expr['data'] = eo['data']

    ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'KBaseFeatureValues.ExpressionMatrix',
                                                                          'data' : expr,
                                                                          'name' : (param['out_expr_object_name'])}]})

    ## Upload FeatureSet
    fs ={'description':'Differentially expressed genes generated by {0}'.format(" ".join(cmd_coex_filter)),
         'elements': {}}
    
    with open("{0}/{1}".format(RAWEXPR_DIR, GENELST_FN),'r') as glh:
      gl = glh.readlines()
    gl = [x.strip('\n') for x in gl]

    for g in gl:
      if 'genome_ref' in expr:
        fs['elements'][g] = [expr['genome_ref']]
      else:
        fs['elements'][g] = []

    ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'KBaseCollections.FeatureSet',
                                                                          'data' : fs,
                                                                          'name' : (param['out_fs_object_name'])}]})
    def filter_BlastOutput(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN filter_BlastOutput
        user_token=ctx['token']
        ws_client=Workspace(url=self.__WS_URL, token=user_token)
        blast_outputs=ws_client.get_objects([{'name':params['in_id'], 
                                              'workspace': params['ws_id']}])

            

        fs ={'elements': {}}
        fs['description'] = "FeatureSet from BlastOutput by "
        printedEvalue = False
        printedEntries = False
        if 'evalue' in params and params['evalue'] != "":
            fs['description'] += " E-value:{0}".format(params['evalue'])
            printedEvalue = True
        if 'entries' in params and (params['entries'] != "" or params['entries'] > 0):
            if(printedEvalue): fs['description'] += ","
            fs['description'] += " # of entries :{0}".format(params['entries'])
            printedEntries = True
        if not printedEvalue and not printedEntries:
            fs['description'] += "no filtering"
        
        if len(blast_outputs) != 1:
            fs['description'] = "No such blast output object was found : {0}/{1}".format(param['workspace_name'], param['object_name'])
        else:
            fm = {}
            f2g = {}
            for boid in blast_outputs[0]['data']['BlastOutput_iterations']['Iteration']:
                for hitd in boid['Iteration_hits']['Hit']:
                    print hitd['Hit_def']
                    ali = hitd['Hit_def'].find('#')
                    if(ali < 0): next
                    fid = hitd['Hit_def'][0:ali]
                    gri = hitd['Hit_def'].find('#', ali+1)
                    if fid not in f2g: f2g[fid] = {}
                    if (gri >=  0 and not gri == (ali+1)): 
                        grid = hitd['Hit_def'][(ali+1):gri]
                        f2g[fid][grid] = 1
                    for hspd in hitd['Hit_hsps']['Hsp']:
                        if fid in fm:
                            if float(hspd['Hsp_evalue']) < fm[fid]:
                                fm[fid] = float(hspd['Hsp_evalue'])
                        else: fm[fid] = float(hspd['Hsp_evalue'])
           
            fms = sorted(fm.items(), key=lambda x: x[1], reverse=False)
            bol = len(fms)
            if params['entries'] != "" or int(params['entries']) > 0:
                if(int(params['entries']) < bol):
                    bol = int(params['entries'])
            for i in range(bol):
                if(fms[i][1] > float(params['evalue'])): break
                if fms[i][0] in f2g:
                    fs['elements'][fms[i][0]] = f2g[fms[i][0]].keys()
                else:
                    fs['elements'][fms[i][0]] = []

        ws_client.save_objects(
            {"workspace":params['ws_id'],
            "objects": [{
                "type":"KBaseCollections.FeatureSet",
                "data":fs,
                "name":params['out_id']}
            ]})

        #pprint(fs)
        returnVal = {'obj_name' : params['out_id'], 'ws_id' : params['ws_id']}

        #END filter_BlastOutput

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method filter_BlastOutput return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Exemplo n.º 19
0
    def create_expression_matrix(self, ctx, expressionMatrixParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_expression_matrix


        params    = expressionMatrixParams
        returnVal = params['ws_expression_matrix_id']
        #Set up workspace client
        user_token = ctx['token']
        workspace = params['workspace_name']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace_name']
            }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = join (self.__SCRATCH , "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to get fpkmgenematrix.R

        # Prepare output object.
        outjson = False;
        #outjson = "repfpkmgenematrix.R.matrix.txt.json";

        if params['include_replicates'] ==0:
         scriptfile = "fpkmgenematrix.R"
         outjson = script_util2.generate_and_upload_expression_matrix(self.__LOGGER, self.__SCRATCH,
                    self.__RSCRIPTS, scriptfile, self.__SHOCK_URL, self.__HS_URL, user_token,
                    cuffdiff_dir, self.__WS_URL,workspace)


        else:
         scriptfile = "repfpkmgenematrix.R"
         outjson = script_util2.generate_and_upload_expression_matrix(self.__LOGGER, self.__SCRATCH,
                    self.__RSCRIPTS, scriptfile, self.__SHOCK_URL, self.__HS_URL, user_token,
                    cuffdiff_dir, self.__WS_URL,workspace)

        if outjson is False:
            self.__LOGGER.info("Creation of expression matrix failed")
            return returnVal
        with open("{0}/{1}".format(self.__SCRATCH , outjson),'r') as et:
                  eo = json.load(et)
        eo['type']='untransformed'
        genome_ref = s_res[0]['data']['genome_id']
        #eo['genome_ref'] = genome_ref

        self.__LOGGER.info(workspace + self.__SCRATCH + outjson + params['ws_expression_matrix_id'])
        ws_client.save_objects({'workspace' : workspace,
            'objects' : [{ 'type' : 'KBaseFeatureValues.ExpressionMatrix',
                           'data' : eo,
                           'name' : params['ws_expression_matrix_id']
                        }]})


        #END create_expression_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method create_expression_matrix return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 20
0
    def generate_cummerbund_plots(self, ctx, cummerbundParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plots

        params    = cummerbundParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace_name']
            }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject=dict()

        # Prepare output plot list
        cummerbundplotset=[]

        # List of plots to generate
        plotlist = [
                { 'file': "dispersionplot.R",
                  'title': "Dispersion plot",
                  'description': "Dispersion plot is the quality measure of the data. It estimates deviation from threshold against counts in FPKM." },


                { 'file': "fpkmscvplot.R",
                  'title': "Genes CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data." },

                { 'file': "isoformscvplot.R",
                  'title': "Isoform CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data.Differences in CV2 can result in lower numbers of differentially expressed isoforms due to a higher degree of variability between replicate fpkm estimates." },

                { 'file': "densityplot.R",
                  'title': "Density plot",
                  'description': "The density plot shows the distribution of FPKM scores across samples" },

                { 'file': "csdensityrepplot.R",
                  'title': "Replicates density plot",
                  'description': "The replicates density plot shows the distribution of FPKM scores across sample replicates" },

                { 'file': "boxplot.R",
                  'title': "Box plots",
                  'description': "The box plots show the FPKM distribution across samples." },

                { 'file': "boxrepplot.R",
                  'title': "Box plots of replicates",
                  'description': "The box plots of replicates show the FPKM distribution across sample replicates." },

                { 'file': "pairwisescatterplots.R",
                  'title': "Pairwise scatter plots",
                  'description': "The scatterplots show differences in gene expression between two samples. If two samples are identical, all genes will fall on the mid-line." },

                 { 'file': "volcanomatrixplot.R",
                  'title': "Volcano matrix plots",
                  'description': "Volcano matrix plot is a scatter plot that also identifies differentially expressed genes (by color) between samples based on log2 fold change cut off." },

                { 'file': "pcaplot.R",
                  'title': "PCA plot",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions." },

                { 'file': "pcarepplot.R",
                  'title': "PCA plot including replicates",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions including replicates." },

                { 'file': "mdsplot.R",
                  'title': "Multi-dimensional scaling plot",
                  'description': "Multi-dimensional scaling plots are similar to PCA plots and useful for determining the major sources of variation in the dataset. " },

                { 'file': "mdsrepplot.R",
                  'title': "Multi-dimensional scaling plot including replicates",
                  'description': "Multi-dimensional scaling plot including replicates are  similar to PCA plots and useful for determining the major sources of variation in the dataset with replicates. These can be useful to determine any systematic bias that may be present between conditions." }
            ]

#TODO.. Giving Rplot.pdf
#                { 'file': "dendrogramplot.R",
#                  'title': "Dendrogram",
#                  'description': "Dendrogram  based on the JS (Jensen-Shannon divergence) distance" },
#
#                { 'file': "dendrogramrepplot.R",
#                  'title': "Dendrogram including replicates",
#                  'description': "Dendrogram including replicates based on the JS (Jensen-Shannon divergence) distance" },


        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS,
                plot['file'], self.__SHOCK_URL, self.__HS_URL, user_token,
                cummerbundplotset, plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info("Problem generating image and json file - " + plot["file"])


        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":params['workspace_name'],
            "objects": [{
                "type":"KBaseRNASeq.cummerbund_output",
                "data":outputobject,
                "name":params["ws_cummerbund_output"]}]
            })

        #END generate_cummerbund_plots

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plots return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 21
0
    def create_expression_matrix(self, ctx, expressionMatrixParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_expression_matrix

        params = expressionMatrixParams
        returnVal = params['ws_expression_matrix_id']
        #Set up workspace client
        user_token = ctx['token']
        workspace = params['workspace_name']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': params['ws_cuffdiff_id'],
            'workspace': params['workspace_name']
        }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to get fpkmgenematrix.R

        # Prepare output object.
        outjson = False
        #outjson = "repfpkmgenematrix.R.matrix.txt.json";

        if params['include_replicates'] == 0:
            scriptfile = "fpkmgenematrix.R"
            outjson = script_util2.generate_and_upload_expression_matrix(
                self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS, scriptfile,
                self.__SHOCK_URL, self.__HS_URL, user_token, cuffdiff_dir,
                self.__WS_URL, workspace)

        else:
            scriptfile = "repfpkmgenematrix.R"
            outjson = script_util2.generate_and_upload_expression_matrix(
                self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS, scriptfile,
                self.__SHOCK_URL, self.__HS_URL, user_token, cuffdiff_dir,
                self.__WS_URL, workspace)

        if outjson is False:
            self.__LOGGER.info("Creation of expression matrix failed")
            return returnVal
        with open("{0}/{1}".format(self.__SCRATCH, outjson), 'r') as et:
            eo = json.load(et)
        eo['type'] = 'untransformed'
        genome_ref = s_res[0]['data']['genome_id']
        #eo['genome_ref'] = genome_ref

        self.__LOGGER.info(workspace + self.__SCRATCH + outjson +
                           params['ws_expression_matrix_id'])
        ws_client.save_objects({
            'workspace':
            workspace,
            'objects': [{
                'type': 'KBaseFeatureValues.ExpressionMatrix',
                'data': eo,
                'name': params['ws_expression_matrix_id']
            }]
        })

        #END create_expression_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method create_expression_matrix return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 22
0
            'remote_md5':reverse_shock_file['file']['checksum']['md5']
        },
        'encoding':'UTF8',
        'type':'fastq',
        'size':reverse_shock_file['file']['size']

    },
    'interleaved':0,
    'sequencing_tech':'artificial reads'
}

ws = Workspace(WORKSPACE_URL, token=token)
new_obj_info = ws.save_objects({
                'workspace':'msneddon:1448037540898',
                'objects':[
                    {
                        'type':'KBaseFile.PairedEndLibrary',
                        'data':paired_end_library,
                        'name':'test.reads',
                        'meta':{},
                        'provenance':[
                            {
                                'service':'MegaHit',
                                'method':'test_megahit'
                            }
                        ]
                    }]
                })
pprint(new_obj_info)

Exemplo n.º 23
0
    def calculate(self, ctx, input):
        # ctx is the context object
        # return variables are: output
        #BEGIN calculate
        ''' Compute reaction probabilities from a probabilistic annotation.

            The input dictionary must contain the following keys:
            probanno: Name of ProbAnno object to input
            probanno_workspace: Workspace from which to grab the ProbAnno object
            rxnprobs: Name of RxnProbs object
            rxnprobs_workspace: Workspace to which to save the RxnProbs object

            The following keys are optional:
            verbose: Print lots of messages on the progress of the algorithm
            template_model: Name of TemplateModel object
            template_workspace: Workspace from which to grab TemplateModel object

            @param ctx Current context object
            @param input Dictionary with input parameters for function
            @return Object info for RxnProbs object
            @raise WrongVersionError when ProbAnno object version number is invalid
            @raise ValueError when template_workspace input argument is not specified
        '''

        # Sanity check on input arguments
        input = self._checkInputArguments(ctx, input, 
                                          ["probanno", "probanno_workspace", "rxnprobs", "rxnprobs_workspace"], 
                                          { "verbose" : False ,
                                            "template_model" : None,
                                            "template_workspace" : None
                                          }
                                         )

        # Make sure the static database files are ready.
        self._checkDatabaseFiles(ctx)

        # Set log level to INFO when verbose parameter is enabled.
        if input['verbose']:
            ctx.set_log_level(log.DEBUG)
        
        # Create a workspace client.
        wsClient = Workspace(self.config["workspace_url"], token=ctx['token'])
        
        # Get the ProbAnno object from the specified workspace.
        probannoObjectId = make_object_identity(input["probanno_workspace"], input["probanno"])
        objectList = wsClient.get_objects( [ probannoObjectId ] )
        probannoObject = objectList[0]
        if probannoObject['info'][2] != ProbAnnoType:
            message = "ProbAnno object type %s is not %s for object %s" %(probannoObject['info'][2], ProbAnnoType, probannoObject['info'][1])
            ctx.log_err(message)
            raise WrongVersionError(message)
        genome = probannoObject["data"]["genome"]
        
        # Create a temporary directory for storing intermediate files when debug is turned on.
        if ctx.get_log_level() >= log.DEBUG2:
            workFolder = tempfile.mkdtemp("", "calculate-%s-" %(genome), self.config["work_folder_path"])
            ctx.log_debug('Intermediate files saved in '+workFolder)
        else:
            workFolder = None

        # When a template model is specified, use it to build dictionaries for roles,
        # complexes, and reactions instead of retrieving from static database files.
        complexesToRoles = None
        reactionsToComplexes = None
        if input["template_model"] is not None or input["template_workspace"] is not None:
            if not(input["template_model"] is not None and input["template_workspace"] is not None) :
                message = "Template model workspace is required if template model ID is provided"
                ctx.log_err(message)
                raise ValueError(message)

            # Create a dictionary to map a complex to a list of roles and a dictionary
            # to map a reaction to a list of complexes.  The dictionaries are specific to
            # the specified template model instead of covering everything in the central
            # data model.
            complexesToRoles = dict()
            reactionsToComplexes = dict()

            # Get the list of RoleComplexReactions for the template model from the
            # fba modeling service.  The RoleComplexReactions structure has a list
            # of ComplexReactions structures for the given role.  And each ComplexReactions
            # structure has a list of reactions for the given complex.
            fbaClient = fbaModelServices(self.config['fbamodeling_url'], token=ctx['token'])
            roleComplexReactionsList = fbaClient.role_to_reactions( { 'templateModel': input['template_model'], 'workspace': input['template_workspace'] } )

            # Build the two dictionaries from the returned list.
            for rcr in roleComplexReactionsList:
                for complex in rcr['complexes']:
                    complexId = re.sub(r'cpx0*(\d+)', r'kb|cpx.\1', complex['name']) # Convert ModelSEED format to KBase format
                    if complexId in complexesToRoles:
                        complexesToRoles[complexId].append(rcr['name'])
                    else:
                        complexesToRoles[complexId] = [ rcr['name'] ]
                    for reaction in complex['reactions']:
                        reactionId = reaction['reaction']
                        if reactionId in reactionsToComplexes:
                            reactionsToComplexes[reactionId].append(complexId)
                        else:
                            reactionsToComplexes[reactionId] = [ complexId ]

        # Calculate per-gene role probabilities.
        roleProbs = self._rolesetProbabilitiesToRoleProbabilities(ctx, input, genome, probannoObject["data"]["roleset_probabilities"], workFolder)

        # Calculate whole cell role probabilities.
        # Note - eventually workFolder will be replaced with a rolesToReactions call
        totalRoleProbs = self._totalRoleProbabilities(ctx, input, genome, roleProbs, workFolder)

        # Calculate complex probabilities.
        complexProbs = self._complexProbabilities(ctx, input, genome, totalRoleProbs, workFolder, complexesToRequiredRoles = complexesToRoles)

        # Calculate reaction probabilities.
        reactionProbs = self._reactionProbabilities(ctx, input, genome, complexProbs, workFolder, rxnsToComplexes = reactionsToComplexes)

        # If the reaction probabilities were not calculated using the data from the fba modeling service
        # via the template model, we need to convert from the KBase ID format to the ModelSEED format.
        if input["template_model"] is None:
            reactionList = list()
            for index in range(len(reactionProbs)):
                reactionList.append(reactionProbs[index][0])
            EntityAPI = CDMI_EntityAPI(self.config["cdmi_url"])
            numAttempts = 4
            while numAttempts > 0:
                try:
                    numAttempts -= 1
                    reactionData = EntityAPI.get_entity_Reaction( reactionList, [ "source_id" ] )
                    if len(reactionList) == len(reactionData):
                        numAttempts = 0
                except HTTPError as e:
                    pass
            for index in range(len(reactionProbs)):
                rxnId = reactionProbs[index][0]
                reactionProbs[index][0] = reactionData[rxnId]['source_id']
 
        # Create a reaction probability object
        objectData = dict()
        objectData["genome"] = probannoObject["data"]["genome"]
        objectData['genome_workspace'] = probannoObject['data']['genome_workspace']
        if input["template_model"] is None:
            objectData['template_model'] = 'None'
        else:
            objectData["template_model"] = input["template_model"]
        if input["template_workspace"] is None:
            objectData['template_workspace'] = 'None'
        else:
            objectData["template_workspace"] = input["template_workspace"]
        objectData["probanno"] = input['probanno']
        objectData['probanno_workspace'] = input['probanno_workspace']
        objectData["id"] = input["rxnprobs"]
        objectData["reaction_probabilities"] = reactionProbs

        objectMetaData = { "num_reaction_probs": len(objectData["reaction_probabilities"]) }
        objectProvData = dict()
        objectProvData['time'] = timestamp(0)
        objectProvData['service'] = os.environ['KB_SERVICE_NAME']
        objectProvData['service_ver'] = ServiceVersion
        objectProvData['method'] = 'calculate'
        objectProvData['method_params'] = input.items()
        objectProvData['input_ws_objects'] = [ '%s/%s/%d' %(probannoObject['info'][7], probannoObject['info'][1], probannoObject['info'][4]) ]
        objectSaveData = dict();
        objectSaveData['type'] = RxnProbsType
        objectSaveData['name'] = input["rxnprobs"]
        objectSaveData['data'] = objectData
        objectSaveData['meta'] = objectMetaData
        objectSaveData['provenance'] = [ objectProvData ]
        objectInfo = wsClient.save_objects( { 'workspace': input["rxnprobs_workspace"], 'objects': [ objectSaveData ] } )
        output = objectInfo[0]
        
        #END calculate

        # At some point might do deeper type checking...
        if not isinstance(output, list):
            raise ValueError('Method calculate return value ' +
                             'output is not type list as required.')
        # return the results
        return [output]
Exemplo n.º 24
0
    def setUp(cls):

        token = environ.get('KB_AUTH_TOKEN', None)

        if token is None:
            sys.stderr.write(
                "Error: Unable to run tests without authentication token!\n")
            sys.exit(1)

        token_file = open('ltest/script_test/token.txt', 'w')
        token_file.write(token)

        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('CoExpression'):
            cls.cfg[nameval[0]] = nameval[1]
        auth_service_url = cls.cfg.get(
            'auth-service-url',
            "https://kbase.us/services/authorization/Sessions/Login")
        ws_url = cls.cfg['ws_url']
        auth_service_url_allow_insecure = cls.cfg[
            'auth-service-url-allow-insecure']
        auth_client = _KBaseAuth(auth_service_url)
        user_id = auth_client.get_user(token)

        ws = Workspace(
            url=ws_url,
            token=token,
            auth_svc=auth_service_url,
            trust_all_ssl_certificates=auth_service_url_allow_insecure)

        # update input data in reverse order of references
        ordered_file_list = [
            INPUT_META_DATA_DIR + '/test_diff_p_distribution_input_ref2.json',
            INPUT_META_DATA_DIR + '/test_diff_p_distribution_input_ref1.json',
            INPUT_META_DATA_DIR + '/test_diff_p_distribution_input.json',
            INPUT_META_DATA_DIR + '/test_view_heatmap_input_ref1.json',
            INPUT_META_DATA_DIR + '/test_view_heatmap_input.json',
            INPUT_META_DATA_DIR + '/test_coex_clust_input.json',
            INPUT_META_DATA_DIR + '/test_filter_genes_input.json'
        ]

        for filename in ordered_file_list:
            with open(filename, 'r') as infile:
                input_meta_data = json.load(infile)

            # create workspace that is local to the user if it does not exist
            workspace_name_t = Template(
                str(input_meta_data['params'][0]['workspace_name']))
            workspace_name = workspace_name_t.substitute(user_id=user_id)
            print('workspace_name: ' + workspace_name)

            try:
                ws_info = ws.get_workspace_info({'workspace': workspace_name})
                print("workspace already exists: " + str(ws_info))
            except:
                ws_info = ws.create_workspace({
                    'workspace':
                    workspace_name,
                    'description':
                    'Workspace for ' + str(input_meta_data['method'])
                })
                print("Created new workspace: " + str(ws_info))

            print('reading input file: ' + filename)
            object_name = str(input_meta_data['params'][0]['object_name'])
            print('object_name: ' + object_name)

            input_data_filename = INPUT_DATA_DIR + '/' + object_name + '.json'
            print('input data filename: ' + input_data_filename)

            with open(input_data_filename, 'r') as infile:
                input_data = json.load(infile)

            # update workspace name in input data
            input_data_str = json.dumps(input_data)
            input_data_t = Template(input_data_str)
            input_data_str = input_data_t.substitute(
                workspace_name=workspace_name)
            input_data = json.loads(input_data_str)

            print('type: ' + input_data[0]['info'][2])

            #upload data (no effect if data already exists in workspace)
            print('uploading input data to workspace')
            ws.save_objects({
                'workspace':
                workspace_name,
                'objects': [{
                    'type': input_data[0]['info'][2],
                    'data': input_data[0]['data'],
                    'name': object_name
                }]
            })
        print('ws objects: ' +
              str(ws.list_objects({'workspaces': [workspace_name]})))
Exemplo n.º 25
0
    parser.add_argument('-o', '--out_id', help='Output workspace object name', action='store', dest='outobj_id', default=None, required=True)

    parser.add_argument('-l', '--support_dir', help='Support directory', action='store', dest='sdir', default='lib', required=True)
    parser.add_argument('-g', '--out_file', help='Output prefix or file name', action='store', dest='otmp', default='outfile', required=True)
    # for meta data
    parser.add_argument('-i', '--in_id', help='Input Shock node id for meta', action='store', dest='inobj_id', default='NotProvided', required=True)
    parser.add_argument('-e', '--ext_type', help='External object type', action='store', dest='etype', default=None, required=True)
    parser.add_argument('-j', '--job_id', help='UJS job id', action='store', dest='jid', default='NoJodID', required=False)

    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    
    kb_token = os.environ.get('KB_AUTH_TOKEN')

    ## main loop
    jif = open("{}/{}".format(args.sdir,args.otmp, 'r'))
    data = json.loads(jif.read())
    jif.close()
    
    wsd = Workspace(url=args.ws_url, token=kb_token)
    wsd.save_objects({'workspace':args.ws_id, 'objects' : [ {
      'type' : 'Transform.Pair', 'data' : data, 'name' : args.outobj_id, 
      'meta' : { 'source_id' : args.inobj_id, 'source_type' : args.etype,
                 'ujs_job_id' : args.jid} } ]})
    

    exit(0);
Exemplo n.º 26
0
    def create_interactive_heatmap_de_genes_old(self, ctx, heatmapParams):
        """
        :param heatmapParams: instance of type "heatmapParams" -> structure:
           parameter "workspace" of String, parameter "sample1" of String,
           parameter "sample2" of String, parameter "q_value_cutoff" of
           Double, parameter "log2_fold_change_cutoff" of Double, parameter
           "num_genes" of Long, parameter "ws_cuffdiff_id" of type
           "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_expression_matrix_id" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        :returns: instance of type "ResultsToReport" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_interactive_heatmap_de_genes_old
        fparams    = heatmapParams
        #returnVal = "ttt"
        #Set up workspace client
        user_token = ctx['token']
        workspace = fparams['workspace']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params['token'] = user_token
        system_params['ws_url'] =  self.__WS_URL
        system_params['logger'] =  self.__LOGGER
        system_params['shock_url'] =  self.__SHOCK_URL
        system_params['hs_url'] =  self.__HS_URL
        system_params['scratch'] =  self.__SCRATCH
        system_params['rscripts'] =  self.__RSCRIPTS
        system_params['workspace'] = workspace




        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : fparams['ws_cuffdiff_id'],
            'workspace' : fparams['workspace']
            }])

         #Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join (self.__SCRATCH , "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        #cuffdiff_dir = "/kb/module/work/cuffdiffData/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)


        #if (cuffdiff_dir is False):
        #    return returnVal
        fparams['cuffdiff_dir'] = cuffdiff_dir
        fparams['infile'] = join (cuffdiff_dir, "gene_exp.diff")
        fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter")


        fparams['pairs']=1
        fparams['logModetmp'] = 2



        rparams = {}
        
        rparams['cuffdiff_dir'] = fparams['cuffdiff_dir']
        rparams['outpng'] = join (system_params['scratch'], "heatmap.png")
        rparams['imageheight'] = 1600
        rparams['imagewidth'] = 800
        rparams['plotscript'] = join(system_params['rscripts'], "heatmapplotinteractive.R")
        rparams['include_replicates'] = 1
        rparams['pairs'] = fparams ['pairs']
        rparams['logMode'] = fparams['logModetmp']
        rparams['removezeroes'] = 1
        rparams['outmatrix'] = join (system_params['scratch'], "outmatrix")
        reportObj = {}

        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]['input_ws_objects']=[workspace+'/'+fparams['ws_cuffdiff_id']]
       
        report = ""
        if (fparams['pairs'] != 0):
        
           try:
                filtered_matrix = script_util2.filter_expression_matrix(fparams, system_params)
                self.__LOGGER.info("matrix is " + filtered_matrix)
                fparams['infile'] = join (system_params['scratch'], "gene_exp.diff.filter")
                fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter.genelist")
                genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(fparams)
                rparams['genelist'] = filtered_matrix
           except:
                report += "There was an error in creating expression matrix"
                report += "No differentially expressed genes were found"
                report += "Please change / double check  your filtering criteria"

	        reportObj = {
		    'objects_created':[],
		    'text_message':report
		}

		reportName = 'create_interactive_heatmap_de_genes_old_'+str(hex(uuid.getnode()))
		report_info = ws_client.save_objects({
		    'workspace':fparams['workspace'],
		    'objects':[
			 {
			  'type':'KBaseReport.Report',
			  'data':reportObj,
			  'name':reportName,
			  'meta':{},
			  'hidden':1, # important!  make sure the report is hidden
			  'provenance':provenance
			 }
		    ] })[0]  
		print('saved Report: '+pformat(report_info))

		returnVal = { "report_name" : reportName,"report_ref" : str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(report_info[4]) }

                return [returnVal]


        try:
	    # Prepare output object.
	    outjson = False;
     

	    roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic (rparams)

	    # Run R script to run cummerbund json and update the cummerbund output json file
	    # Prepare output object.
	    outputobject=dict()




	    # Prepare output plot list
	    cummerbundplotset=[]

	    # List of plots to generate
	    plotlist = [
		      
		    { 'roptstr': roptstr_basic_heatmap_rep,
		      'title': "Heatmap",
		      'description': "Heatmap", 
		      'exp' : fparams['ws_expression_matrix_id']
		      }

		]
	    fparams['cummerbundplotset'] = cummerbundplotset
	    # Iterate through the plotlist and generate the images and json files.
	    for plot in plotlist:
		fparams['title'] = plot['title']
		fparams['description'] = plot['description']


		status = script_util2.rplotanduploadinteractive(system_params,fparams, rparams, plot['roptstr'])
		if status == False:
                    self.__LOGGER.info("Problem generating image and json file - " + plot["roptstr"])
                    report = "Error: Please select a different cutoff criteria. None of the genes passed fold change and q-value-cutoff. "
                    report += "Failed to create expression  matrix with differentially expressed genes(" +  fparams['ws_expression_matrix_id'] + "). No genes to show on heatmap."
                    reportObj = {
                    'objects_created':[],
                    'text_message':report
                    }
                    reportName = 'create_interactive_heatmap_de_genes_old_'+str(hex(uuid.getnode()))
                    report_info = ws_client.save_objects({
                        'workspace':fparams['workspace'],
                        'objects':[
                        {
                        'type':'KBaseReport.Report',
                        'data':reportObj,
                        'name':reportName,
                        'meta':{},
                        'hidden':1, # important!  make sure the report is hidden
                        'provenance':provenance
                    }
                    ] })[0]  
                    print('saved Report: '+pformat(report_info))

                    returnVal = { "report_name" : reportName,"report_ref" : str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(report_info[4]) }

                    return [returnVal]


		else:
                      
		      self.__LOGGER.info(status)

		      outjson = status
		      self.__LOGGER.info('5')
		      with open("{0}/{1}".format(self.__SCRATCH , outjson),'r') as et2:

			eo2 = json.load(et2)
			genome_ref = s_res[0]['data']['genome_id']
			eo2['type']='log2_level'
			eo2['genome_ref'] = genome_ref
		        self.__LOGGER.info('3')
			self.__LOGGER.info(workspace + self.__SCRATCH + outjson + plot['exp'])
                        try:
                            res = ws_client.save_objects({'workspace' : workspace,
                                   'objects' : [{ 'type' : 'KBaseFeatureValues.ExpressionMatrix',
                                   'data' : eo2,
                                   'name' : plot['exp']
                             }]})
                        except:
                            self.__LOGGER ("xxxx6")

        except:
		self.__LOGGER.info('6')
        report = "Successfully created expression matrix"
        reportObj = {
             'objects_created':[],
             'text_message':report
              }

        self.__LOGGER.info('7')

	reportName = 'create_interactive_heatmap_de_genes_old_'+str(hex(uuid.getnode()))
	report_info = ws_client.save_objects({
	    'workspace':fparams['workspace'],
	    'objects':[
		 {
		  'type':'KBaseReport.Report',
		  'data':reportObj,
		  'name':reportName,
		  'meta':{},
		  'hidden':1, # important!  make sure the report is hidden
		  'provenance':provenance
		 }
	    ] })[0]  
	print('saved Report: '+pformat(report_info))

	returnVal = { "report_name" : reportName,"report_ref" : str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(report_info[4]) }


        #END create_interactive_heatmap_de_genes_old

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method create_interactive_heatmap_de_genes_old return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Exemplo n.º 27
0
    def generate_cummerbund_plot2(self, ctx, cummerbundstatParams):
        """
        :param cummerbundstatParams: instance of type "cummerbundstatParams"
           -> structure: parameter "workspace" of String, parameter
           "ws_cuffdiff_id" of type "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output), parameter "ws_diffstat_output" of
           type "ws_diffstat_output" (Differential stat workspace id)
        :returns: instance of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plot2
        params    = cummerbundstatParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)


        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace']
            }])
        print "Getting genome info"

        genome_ref = s_res[0]['data']['genome_id']
        #genome_ref = '2702/6/2'
        #genome_ref = '2702/26/1'
        #genome_ref = '2229/21/10'
        print genome_ref
        gaapi = GenomeAnnotationAPI(self.callbackURL, token=user_token)
        genome = gaapi.get_genome_v1({"genomes": [{"ref": genome_ref}],
                                          "included_fields": ["scientific_name"],
                                          "included_feature_fields": ["id", "function", "type"
                                                                      ]})["genomes"][0]["data"]
        genome_dict = {}
        features = genome['features']
        for feature in features:
          id = feature['id']
          try: 
            function = feature['function']
            if not function:
              function = 'Unknown'
          except:
             function = 'Unknown'
          genome_dict[id] = function


        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject=dict()

        # Prepare output plot list
        cummerbundplotset=[]
        # List of plots to generate
        plotlist = [
                { 'file': "dispersionplot.R",
                  'title': "Dispersion plot",
                  'description': "Dispersion plot is the quality measure of the data. It estimates deviation from threshold against counts in FPKM." },


                { 'file': "fpkmscvplot.R",
                  'title': "Genes CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data." },

                { 'file': "isoformscvplot.R",
                  'title': "Isoform CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data.Differences in CV2 can result in lower numbers of differentially expressed isoforms due to a higher degree of variability between replicate fpkm estimates." },

                { 'file': "densityplot.R",
                  'title': "Density plot",
                  'description': "The density plot shows the distribution of FPKM scores across samples" },

                { 'file': "csdensityrepplot.R",
                  'title': "Replicates density plot",
                  'description': "The replicates density plot shows the distribution of FPKM scores across sample replicates" },

                { 'file': "boxplot.R",
                  'title': "Box plots",
                  'description': "The box plots show the FPKM distribution across samples." },

                { 'file': "boxrepplot.R",
                  'title': "Box plots of replicates",
                  'description': "The box plots of replicates show the FPKM distribution across sample replicates." },

                { 'file': "pairwisescatterplots.R",
                  'title': "Pairwise scatter plots",
                  'description': "The scatterplots show differences in gene expression between two samples. If two samples are identical, all genes will fall on the mid-line." },

                 { 'file': "volcanomatrixplot.R",
                  'title': "Volcano matrix plots",
                  'description': "Volcano matrix plot is a scatter plot that also identifies differentially expressed genes (by color) between samples based on log2 fold change cut off." },

                { 'file': "pcaplot.R",
                  'title': "PCA plot",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions." },

                { 'file': "pcarepplot.R",
                  'title': "PCA plot including replicates",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions including replicates." },

                { 'file': "mdsplot.R",
                  'title': "Multi-dimensional scaling plot",
                  'description': "Multi-dimensional scaling plots are similar to PCA plots and useful for determining the major sources of variation in the dataset. " },

                { 'file': "mdsrepplot.R",
                  'title': "Multi-dimensional scaling plot including replicates",
                  'description': "Multi-dimensional scaling plot including replicates are  similar to PCA plots and useful for determining the major sources of variation in the dataset with replicates. These can be useful to determine any systematic bias that may be present between conditions." }
            ]


        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS,
                plot['file'], self.__SHOCK_URL, self.__HS_URL, user_token,
                cummerbundplotset, plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info("Problem generating image and json file - " + plot["file"])


        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":params['workspace'],
            "objects": [{
                "type":"KBaseRNASeq.cummerbund_output",
                "data":outputobject,
                "name":params["ws_cummerbund_output"]}]
            })

        infile =  join(cuffdiff_dir, "gene_exp.diff") 
        outfile = join(cuffdiff_dir, "gene_exp_diff.out") 
        x=v.volcano_plot_data_parse_and_upload(infile,outfile, genome_dict)
        with open(outfile) as f:
            statdata = json.load(f)
        res = ws_client.save_objects({
            "workspace":params['workspace'],
            "objects": [{
                "type":"KBaseRNASeq.DifferentialExpressionStat",
                "data":statdata,
                "name":params["ws_diffstat_output"]}]
            })

        #END generate_cummerbund_plot2

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plot2 return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 28
0
def run_coex_cluster(workspace_service_url=None,
                     param_file=None,
                     level=logging.INFO,
                     logger=None):
    """
    Narrative Job Wrapper script to execute coex_cluster2
    
    Args:
        workspace_service_url:  A url for the KBase Workspace service 
        param_file: parameter file
        object_name: Name of the object in the workspace 
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        Output is written back in WS
    
    Authors:
        Shinjae Yoo
    
    """

    try:
        os.makedirs(RAWEXPR_DIR)
    except:
        pass
    try:
        os.makedirs(CLSTR_DIR)
    except:
        pass
    try:
        os.makedirs(FINAL_DIR)
    except:
        pass

    if logger is None:
        logger = script_utils.stderrlogger(__file__)

    logger.info(
        "Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
    token = os.environ.get("KB_AUTH_TOKEN")

    with open(param_file) as paramh:
        param = json.load(paramh)

    from biokbase.workspace.client import Workspace
    ws = Workspace(url=workspace_service_url,
                   token=os.environ['KB_AUTH_TOKEN'])
    expr = ws.get_objects([{
        'workspace': param['workspace_name'],
        'name': param['object_name']
    }])[0]['data']

    cmd_dowload_cvt_tsv = [
        FVE_2_TSV, '--workspace_service_url', workspace_service_url,
        '--workspace_name', param['workspace_name'], '--object_name',
        param['object_name'], '--working_directory', RAWEXPR_DIR,
        '--output_file_name', EXPRESS_FN
    ]

    # need shell in this case because the java code is depending on finding the KBase token in the environment
    #  -- copied from FVE_2_TSV
    tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv),
                                    stderr=subprocess.PIPE,
                                    shell=True)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)
        #raise Exception(stderr)

    logger.info("Coexpression clustering analysis")

    ## Prepare sample file
    # detect num of columns
    with open("{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), 'r') as f:
        fl = f.readline()
    ncol = len(fl.split('\t'))

    with open("{0}/{1}".format(RAWEXPR_DIR, SAMPLE_FN), 'wt') as s:
        s.write("0")
        for j in range(1, ncol - 1):
            s.write("\t{0}".format(j))
        s.write("\n")

    ## Run coex_cluster
    cmd_coex_cluster = [
        COEX_CLUSTER, '-t', 'y', '-i',
        "{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), '-o',
        "{0}/{1}".format(CLSTR_DIR, CLSTR_FN)
    ]

    for p in [
            'net_method', 'minRsq', 'maxmediank', 'maxpower', 'clust_method',
            'minModuleSize', 'detectCutHeight'
    ]:
        if p in param:
            cmd_coex_cluster.append("--{0}".format(p))
            cmd_coex_cluster.append(str(param[p]))

    #sys.exit(2) #TODO: No error handling in narrative so we do graceful termination

    #if 'p_value' in param and 'num_features' in param:
    #  logger.error("Both of p_value and num_features cannot be defined together");
    #  sys.exit(3)

    tool_process = subprocess.Popen(cmd_coex_cluster, stderr=subprocess.PIPE)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        if re.search(
                r'^There were \d+ warnings \(use warnings\(\) to see them\)',
                stderr):
            logger.info(stderr)
        else:
            logger.error(stderr)
            raise Exception(stderr)

    # build index for gene list
    pos_index = {
        expr['data']['row_ids'][i]: i
        for i in range(0, len(expr['data']['row_ids']))
    }

    # parse clustering results
    cid2genelist = {}
    with open("{0}/{1}".format(CLSTR_DIR, CLSTR_FN), 'r') as glh:
        glh.readline()  # skip header
        for line in glh:
            gene, cluster = line.replace('"', '').split("\t")
            if cluster not in cid2genelist:
                cid2genelist[cluster] = []
            cid2genelist[cluster].append(gene)

    if (len(cid2genelist) < 1):
        logger.error("Clustering failed")
        return empty_results("Error: No cluster output", expr,
                             workspace_service_url, param, logger, ws)
        #sys.exit(4)

    logger.info("Uploading the results onto WS")
    feature_clusters = []
    for cluster in cid2genelist:
        feature_clusters.append({
            "id_to_pos":
            {gene: pos_index[gene]
             for gene in cid2genelist[cluster]}
        })

    ## Upload Clusters
    feature_clusters = {
        "original_data":
        "{0}/{1}".format(param['workspace_name'], param['object_name']),
        "feature_clusters":
        feature_clusters
    }

    ws.save_objects({
        'workspace':
        param['workspace_name'],
        'objects': [{
            'type': 'KBaseFeatureValues.FeatureClusters',
            'data': feature_clusters,
            'name': (param['out_object_name'])
        }]
    })
Exemplo n.º 29
0
def mys_example(args):
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    indata = wsd.get_object({
        'id': args.inobj_id,
        #'type' : 'KBaseExpression.ExpressionSeries',
        'workspace': args.ws_id
    })['data']

    if indata is None:
        raise Exception("Object {} not found in workspace {}".format(
            args.inobj_id, args.ws_id))

    ###
    # execute filtering
    flt_cmd_lst = [
        'mys_example', "-i", "{}-{}".format(os.getpid(), args.exp_fn)
    ]
    if (args.method is not None):
        flt_cmd_lst.append('-m')
        flt_cmd_lst.append(args.method)
    if (args.p_value is not None):
        flt_cmd_lst.append('-p')
        flt_cmd_lst.append(args.p_value)
    if (args.num_genes is not None):
        flt_cmd_lst.append('-n')
        flt_cmd_lst.append(args.num_genes)
    if (args.flt_out_fn is not None):
        flt_cmd_lst.append('-o')
        flt_cmd_lst.append("{}-{}".format(os.getpid(), args.flt_out_fn))

    p1 = Popen(flt_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    # print output message for error tracking
    if out_str[0] is not None: print out_str[0]
    if out_str[1] is not None: print >> sys.stderr, out_str[1]
    flt_cmd = " ".join(flt_cmd_lst)

    ###
    # put it back to workspace
    #fif = open("{}-{}".format(os.getpid(),args.flt_out_fn), 'r')
    #fif.readline(); # skip header

    # assume only one genome id
    outdata = {}
    outdata['key'] = indata['key']
    outdata['value'] = "{}{}".format(indata['value'], indata['value'])
    data_list = []
    data_list.append({
        'type': 'MyService.PairString',
        'data': outdata,
        'name': args.outobj_id,
        'meta': {
            'org.series': args.inobj_id
        }
    })
    wsd.save_objects({'workspace': args.ws_id, 'objects': data_list})

    if (args.del_tmps is "true"):
        os.remove("{}-{}".format(os.getpid(), args.exp_fn))
        os.remove("{}-{}".format(os.getpid(), args.flt_out_fn))
Exemplo n.º 30
0
    def generate_cummerbund_plots(self, ctx, cummerbundParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plots

        params = cummerbundParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': params['ws_cuffdiff_id'],
            'workspace': params['workspace_name']
        }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject = dict()

        # Prepare output plot list
        cummerbundplotset = []

        # List of plots to generate
        plotlist = [{
            'file':
            "dispersionplot.R",
            'title':
            "Dispersion plot",
            'description':
            "Dispersion plot is the quality measure of the data. It estimates deviation from threshold against counts in FPKM."
        }, {
            'file':
            "fpkmscvplot.R",
            'title':
            "Genes CV plot",
            'description':
            "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data."
        }, {
            'file':
            "isoformscvplot.R",
            'title':
            "Isoform CV plot",
            'description':
            "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data.Differences in CV2 can result in lower numbers of differentially expressed isoforms due to a higher degree of variability between replicate fpkm estimates."
        }, {
            'file':
            "densityplot.R",
            'title':
            "Density plot",
            'description':
            "The density plot shows the distribution of FPKM scores across samples"
        }, {
            'file':
            "csdensityrepplot.R",
            'title':
            "Replicates density plot",
            'description':
            "The replicates density plot shows the distribution of FPKM scores across sample replicates"
        }, {
            'file':
            "boxplot.R",
            'title':
            "Box plots",
            'description':
            "The box plots show the FPKM distribution across samples."
        }, {
            'file':
            "boxrepplot.R",
            'title':
            "Box plots of replicates",
            'description':
            "The box plots of replicates show the FPKM distribution across sample replicates."
        }, {
            'file':
            "pairwisescatterplots.R",
            'title':
            "Pairwise scatter plots",
            'description':
            "The scatterplots show differences in gene expression between two samples. If two samples are identical, all genes will fall on the mid-line."
        }, {
            'file':
            "volcanomatrixplot.R",
            'title':
            "Volcano matrix plots",
            'description':
            "Volcano matrix plot is a scatter plot that also identifies differentially expressed genes (by color) between samples based on log2 fold change cut off."
        }, {
            'file':
            "pcaplot.R",
            'title':
            "PCA plot",
            'description':
            "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions."
        }, {
            'file':
            "pcarepplot.R",
            'title':
            "PCA plot including replicates",
            'description':
            "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions including replicates."
        }, {
            'file':
            "mdsplot.R",
            'title':
            "Multi-dimensional scaling plot",
            'description':
            "Multi-dimensional scaling plots are similar to PCA plots and useful for determining the major sources of variation in the dataset. "
        }, {
            'file':
            "mdsrepplot.R",
            'title':
            "Multi-dimensional scaling plot including replicates",
            'description':
            "Multi-dimensional scaling plot including replicates are  similar to PCA plots and useful for determining the major sources of variation in the dataset with replicates. These can be useful to determine any systematic bias that may be present between conditions."
        }]

        #TODO.. Giving Rplot.pdf
        #                { 'file': "dendrogramplot.R",
        #                  'title': "Dendrogram",
        #                  'description': "Dendrogram  based on the JS (Jensen-Shannon divergence) distance" },
        #
        #                { 'file': "dendrogramrepplot.R",
        #                  'title': "Dendrogram including replicates",
        #                  'description': "Dendrogram including replicates based on the JS (Jensen-Shannon divergence) distance" },

        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(
                self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS, plot['file'],
                self.__SHOCK_URL, self.__HS_URL, user_token, cummerbundplotset,
                plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info(
                    "Problem generating image and json file - " + plot["file"])

        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":
            params['workspace_name'],
            "objects": [{
                "type": "KBaseRNASeq.cummerbund_output",
                "data": outputobject,
                "name": params["ws_cummerbund_output"]
            }]
        })

        #END generate_cummerbund_plots

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plots return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 31
0
#!/usr/bin/env python
from biokbase.workspace.client import Workspace
import os, sys, json
Token = os.environ['KB_AUTH_TOKEN']
Workspace_URL = 'https://appdev.kbase.us/services/ws'
WSClient = Workspace(url=Workspace_URL, token=Token)
print(WSClient.ver())

with open('MSD_v1.0_Biochem.json', "r") as read_file:
    data = json.load(read_file)

results = WSClient.save_objects({
    'workspace':
    'kbase',
    'objects': [{
        'type': 'KBaseBiochem.Biochemistry',
        'data': data,
        'name': 'default'
    }]
})
Exemplo n.º 32
0
def gl2networks (args) :
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    raw_data = wsd.get_object({'id' : args.inobj_id,
                  'workspace' : args.ws_id})['data']

    gl = [ gr[2] for gr in raw_data['genes']]
    gl_str = "'" + "','".join(gl)+ "'"

    
    sql = "SELECT DISTINCT af1.to_link, af2.to_link, f1.source_id, f2.source_id, af1.strength, ig.from_link FROM IsGroupingOf ig, AssociationFeature af1, AssociationFeature af2, Feature f1, Feature f2 WHERE ig.to_link =  af1.from_link and af1.from_link = af2.from_link and (af1.to_link IN ({}) AND af2.to_link IN ({}) ) AND af1.to_link < af2.to_link AND f1.id = af1.to_link AND f2.id = af2.to_link".format(gl_str, gl_str)

    nc = Node()
    datasets = [];

    try:
        con = mdb.connect(args.db_host, args.db_user, args.db_pass, args.db_name);
        cur = con.cursor()
        cur.execute(sql)
    
        edge = cur.fetchone()
        dsid = set()
        while( edge is not None):
            nc.add_edge(edge[4], edge[5], edge[0], 'GENE', edge[1], 'GENE', 0.0, edge[2], edge[3]);
            dsid.add(edge[5]);
            edge = cur.fetchone()
            
        ds_str = "'" + "','".join(dsid)+ "'"
        cur.execute("SELECT id, association_type, data_source, description , df.to_link, sr.from_link FROM AssociationDataset, IsDatasetFor df, IsSourceForAssociationDataset sr WHERE id = df.from_link and id = sr.to_link and id IN({})".format(ds_str))
        ds = cur.fetchone()
        while( ds is not None):
            datasets.append ( { 
                'network_type' : ds[1],
                'taxons' : [ ds[4] ],
                'source_ref' : ds[5],
                'name' : ds[0],
                'id' : ds[0],
                'description' : ds[3],
                'properties' : {
                }
            })
            ds = cur.fetchone()

        # generate Networks object
        net_object = {
          'datasets' : datasets,
          'nodes' : nc.nodes,
          'edges' : nc.edges,
          'user_annotations' : {"genes" :",".join(gl) },
          'name' : 'GeneList Internal Network',
          'id' : args.outobj_id,
          'properties' : {
            'graphType' : 'edu.uci.ics.jung.graph.SparseMultigraph'
          }
        }
 
        # Store results object into workspace
        wsd.save_objects({'workspace' : args.ws_id, 'objects' : [{'type' : 'KBaseNetworks.Network', 'data' : net_object, 'name' : args.outobj_id, 'meta' : {'org_obj_id' : args.inobj_id, 'org_ws_id' : args.ws_id}}]})
        
    except mdb.Error, e:
        print "Error %d: %s" % (e.args[0],e.args[1])
        sys.exit(1)
Exemplo n.º 33
0
    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    kb_token = os.environ.get('KB_AUTH_TOKEN')

    ## main loop
    jif = open("{}/{}".format(args.sdir, args.otmp, 'r'))
    data = json.loads(jif.read())
    jif.close()

    wsd = Workspace(url=args.ws_url, token=kb_token)
    wsd.save_objects({
        'workspace':
        args.ws_id,
        'objects': [{
            'type': 'Transform.Pair',
            'data': data,
            'name': args.outobj_id,
            'meta': {
                'source_id': args.inobj_id,
                'source_type': args.etype,
                'ujs_job_id': args.jid
            }
        }]
    })

    exit(0)
Exemplo n.º 34
0
    def create_interactive_heatmap_de_genes(self, ctx, interactiveHeatmapParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_interactive_heatmap_de_genes
        fparams    = interactiveHeatmapParams
        #returnVal = "ttt"
        #Set up workspace client
        user_token = ctx['token']
        workspace = fparams['workspace_name']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params['token'] = user_token
        system_params['ws_url'] =  self.__WS_URL
        system_params['logger'] =  self.__LOGGER
        system_params['shock_url'] =  self.__SHOCK_URL
        system_params['hs_url'] =  self.__HS_URL
        system_params['scratch'] =  self.__SCRATCH
        system_params['rscripts'] =  self.__RSCRIPTS
        system_params['workspace'] = workspace






        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : fparams['ws_cuffdiff_id'],
            'workspace' : fparams['workspace_name']
            }])

         #Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join (self.__SCRATCH , "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        #cuffdiff_dir = "/kb/module/work/nnc/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)


        #if (cuffdiff_dir is False):
        #    return returnVal
        fparams['cuffdiff_dir'] = cuffdiff_dir
        fparams['infile'] = join (cuffdiff_dir, "gene_exp.diff")
        fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter")

        filtered_matrix = script_util2.filter_expression_matrix(fparams, system_params)
        self.__LOGGER.info("matrix is " + filtered_matrix)

        fparams['infile'] = join (system_params['scratch'], "gene_exp.diff.filter")
        fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter.genelist")



        genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(fparams)


        # Prepare output object.
        outjson = False;
 

        rparams = {}
        rparams['genelist'] = filtered_matrix
        rparams['cuffdiff_dir'] = fparams['cuffdiff_dir']
        rparams['outpng'] = join (system_params['scratch'], "heatmap.png")
        rparams['imageheight'] = 1600
        rparams['imagewidth'] = 800
        rparams['plotscript'] = join(system_params['rscripts'], "heatmapplotinteractive.R")
        rparams['include_replicates'] = 1
        rparams['outmatrix'] = join (system_params['scratch'], "outmatrix")

        roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic (rparams)

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject=dict()




        # Prepare output plot list
        cummerbundplotset=[]

        # List of plots to generate
        plotlist = [
                  
                { 'roptstr': roptstr_basic_heatmap_rep,
                  'title': "Heatmap",
                  'description': "Heatmap", 
                  'exp' : fparams['ws_expression_matrix_id']
                  }

            ]
        fparams['cummerbundplotset'] = cummerbundplotset
        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            fparams['title'] = plot['title']
            fparams['description'] = plot['description']


            status = script_util2.rplotanduploadinteractive(system_params,fparams, rparams, plot['roptstr'])
            if status == False:
                   self.__LOGGER.info("Problem generating image and json file - " + plot["roptstr"])
            else:
                  self.__LOGGER.info(status)

                  outjson = status
                  with open("{0}/{1}".format(self.__SCRATCH , outjson),'r') as et2:
                    eo2 = json.load(et2)
                    genome_ref = s_res[0]['data']['genome_id']
                    eo2['type']='untransformed'
                    #eo2['genome_ref'] = genome_ref
                    self.__LOGGER.info(workspace + self.__SCRATCH + outjson + plot['exp'])
                    ws_client.save_objects({'workspace' : workspace,
                           'objects' : [{ 'type' : 'KBaseFeatureValues.ExpressionMatrix',
                           'data' : eo2,
                           'name' : plot['exp']
                     }]})

        returnVal = fparams['ws_expression_matrix_id']

        #END create_interactive_heatmap_de_genes

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method create_interactive_heatmap_de_genes return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 35
0
from biokbase.workspace.client import Workspace
ws_client = Workspace()
ws_next_client = Workspace(url='https://next.kbase.us/services/ws')
a, b = ws_next_client.get_objects([{'objid' : '4', 'wsid' : '68'}, {'objid' : '5', 'wsid' : '68'}])[0:2]
a_params = {'type' : a['info'][2], 'data': a['data']}
b_params = {'type' : b['info'][2], 'data': b['data']}
ws_client.save_objects({'id': '9145', 'objects': [a_params, b_params]})

    
    kb_token = os.environ.get('KB_AUTH_TOKEN')
    wsd = Workspace(url=args.ws_url, token=kb_token)

    gids = [ re.sub(r"_ContigSet.jsonp$","", f) for f in listdir(".") if f.endswith("_ContigSet.jsonp")]


    ## main loop
    for gid in gids:
      # store contigset first
      jif = open("{}/{}_ContigSet.jsonp".format(".",gid, 'r'))
      data = json.loads(jif.read())
      jif.close()
      
      wsd.save_objects({'workspace':args.ws_id, 'objects' : [ {
        'type' : 'KBaseGenomes.ContigSet', 'data' : data, 'name' : "{}-{}_cs".format(args.outobj_id, gid), 
        'meta' : { 'source_id' : args.inobj_id, 'source_type' : args.etype,
                   'ujs_job_id' : args.jid} } ]})
      
      jif = open("{}/{}.jsonp".format(".",gid, 'r'))
      data = json.loads(jif.read())
      jif.close()
      
      data['contigset_ref'] = "{}/{}-{}_cs".format(args.ws_id,args.outobj_id, gid)


      wsd.save_objects({'workspace':args.ws_id, 'objects' : [ {
        'type' : 'KBaseGenomes.Genome', 'data' : data, 'name' : "{}-{}_gn".format(args.outobj_id, gid), 
        'meta' : { 'source_id' : args.inobj_id, 'source_type' : args.etype,
                   'ujs_job_id' : args.jid} } ]})

    exit(0);
Exemplo n.º 37
0
def run_filter_genes(workspace_service_url=None,
                     param_file=None,
                     level=logging.INFO,
                     logger=None):
    """
    Narrative Job Wrapper script to execute coex_filter
    
    Args:
        workspace_service_url:  A url for the KBase Workspace service 
        param_file: parameter file
        object_name: Name of the object in the workspace 
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        Output is written back in WS
    
    Authors:
        Shinjae Yoo
    
    """

    try:
        os.makedirs(RAWEXPR_DIR)
    except:
        pass
    try:
        os.makedirs(FLTRD_DIR)
    except:
        pass
    try:
        os.makedirs(FINAL_DIR)
    except:
        pass

    if logger is None:
        logger = script_utils.stderrlogger(__file__)

    logger.info(
        "Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
    token = os.environ.get("KB_AUTH_TOKEN")

    with open(param_file) as paramh:
        param = json.load(paramh)

    from biokbase.workspace.client import Workspace
    ws = Workspace(url=workspace_service_url,
                   token=os.environ['KB_AUTH_TOKEN'])
    expr = ws.get_objects([{
        'workspace': param['workspace_name'],
        'name': param['object_name']
    }])[0]['data']

    cmd_dowload_cvt_tsv = [
        FVE_2_TSV, '--workspace_service_url', workspace_service_url,
        '--workspace_name', param['workspace_name'], '--object_name',
        param['object_name'], '--working_directory', RAWEXPR_DIR,
        '--output_file_name', EXPRESS_FN
    ]

    # need shell in this case because the java code is depending on finding the KBase token in the environment
    #  -- copied from FVE_2_TSV
    tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv),
                                    stderr=subprocess.PIPE,
                                    shell=True)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    logger.info("Identifying differentially expressed genes")

    ## Prepare sample file
    # detect num of columns
    with open("{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), 'r') as f:
        fl = f.readline()
    ncol = len(fl.split('\t'))

    # force to use ANOVA if the number of sample is two
    if (ncol == 3): param['method'] = 'anova'

    with open("{0}/{1}".format(RAWEXPR_DIR, SAMPLE_FN), 'wt') as s:
        s.write("0")
        for j in range(1, ncol - 1):
            s.write("\t{0}".format(j))
        s.write("\n")

    ## Run coex_filter
    cmd_coex_filter = [
        COEX_FILTER, '-i', "{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), '-o',
        "{0}/{1}".format(FLTRD_DIR, FLTRD_FN), '-m', param['method'], '-s',
        "{0}/{1}".format(RAWEXPR_DIR, SAMPLE_FN), '-x',
        "{0}/{1}".format(RAWEXPR_DIR, GENELST_FN), '-t', 'y'
    ]
    if 'num_features' in param:
        cmd_coex_filter.append("-n")
        cmd_coex_filter.append(str(param['num_features']))

    if 'p_value' in param:
        cmd_coex_filter.append("-p")
        cmd_coex_filter.append(str(param['p_value']))

    if 'p_value' not in param and 'num_features' not in param:
        logger.error("One of p_value or num_features must be defined")
        return empty_results("One of p_value or num_features must be defined",
                             expr, workspace_service_url, param, logger, ws)
        #sys.exit(2) #TODO: No error handling in narrative so we do graceful termination

    #if 'p_value' in param and 'num_features' in param:
    #  logger.error("Both of p_value and num_features cannot be defined together");
    #  sys.exit(3)

    tool_process = subprocess.Popen(cmd_coex_filter, stderr=subprocess.PIPE)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    ## Header correction
    try:
        with open("{0}/{1}".format(FLTRD_DIR, FLTRD_FN), 'r') as ff:
            fe = ff.readlines()
        with open("{0}/{1}".format(FLTRD_DIR, FLTRD_FN), 'w') as ff:
            ff.write(
                fl
            )  # use original first line that has correct header information
            fe.pop(0)
            ff.writelines(fe)
    except:
        logger.error("Output was not found")
        return empty_results("Increase p_value or specify num_features", expr,
                             workspace_service_url, param, logger, ws)

    ## checking genelist
    with open("{0}/{1}".format(RAWEXPR_DIR, GENELST_FN), 'r') as glh:
        gl = glh.readlines()
    gl = [x.strip('\n') for x in gl]

    if (len(gl) < 1):
        logger.error("No genes are selected")
        return empty_results("Increase p_value or specify num_features", expr,
                             workspace_service_url, param, logger, ws)
        #sys.exit(4)

    ## Upload FVE
    # change workspace to be the referenced object's workspace_name because it may not be in the same working ws due to referencing
    # Updates: change missing genome handling strategy by copying reference to working workspace
    cmd_upload_expr = [
        TSV_2_FVE, '--workspace_service_url', workspace_service_url,
        '--object_name', param['out_expr_object_name'], '--working_directory',
        FINAL_DIR, '--input_directory', FLTRD_DIR, '--output_file_name',
        FINAL_FN
    ]
    tmp_ws = param['workspace_name']
    if 'genome_ref' in expr:
        obj_infos = ws.get_object_info_new(
            {"objects": [{
                'ref': expr['genome_ref']
            }]})[0]

        if len(obj_infos) < 1:
            logger.error("Couldn't find {0} from the workspace".format(
                expr['genome_ref']))
            raise Exception("Couldn't find {0} from the workspace".format(
                expr['genome_ref']))

        #tmp_ws = "{0}".format(obj_infos[7])
        logger.info("{0} => {1} / {2}".format(expr['genome_ref'], obj_infos[7],
                                              obj_infos[1]))
        if obj_infos[7] != param['workspace_name']:
            #we need to copy it from the other workspace
            try:
                logger.info(
                    "trying to copy the referenced genome object : {0}".format(
                        expr['genome_ref']))
                ws.copy_object({
                    'from': {
                        'ref': expr['genome_ref']
                    },
                    'to': {
                        'workspace': param['workspace_name'],
                        'name': obj_infos[1]
                    }
                })
                # add genome_object_name only after successful copy
                cmd_upload_expr.append('--genome_object_name')
                cmd_upload_expr.append(obj_infos[1])
            except:
                # no permission or any issues... then, give up providing genome reference
                logger.info("".join(traceback.format_exc()))
                pass
        else:
            # it is local... we can simply add reference without copying genome
            cmd_upload_expr.append('--genome_object_name')
            cmd_upload_expr.append(obj_infos[1])

    # updated ws name
    cmd_upload_expr.append('--workspace_name')
    cmd_upload_expr.append(tmp_ws)

    logger.info(" ".join(cmd_upload_expr))

    tool_process = subprocess.Popen(" ".join(cmd_upload_expr),
                                    stderr=subprocess.PIPE,
                                    shell=True)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    with open("{0}/{1}".format(FINAL_DIR, FINAL_FN), 'r') as et:
        eo = json.load(et)

    if 'description' not in expr:
        expr['description'] = "Filtered Expression Matrix"
    expr['description'] += " : Filtered by '{1}' method ".format(
        expr['description'], param['method'])

    if 'feature_mapping' in expr and 'feature_mapping' in eo:
        expr['feature_mapping'] = eo['feature_mapping']
    expr['data'] = eo['data']

    ws.save_objects({
        'workspace':
        param['workspace_name'],
        'objects': [{
            'type': 'KBaseFeatureValues.ExpressionMatrix',
            'data': expr,
            'name': (param['out_expr_object_name'])
        }]
    })

    ## Upload FeatureSet
    fs = {'elements': {}}
    fs['description'] = "FeatureSet identified by filtering method '{0}' ".format(
        param['method'])

    fs['description'] += "from {0}/{1}".format(param['workspace_name'],
                                               param['object_name'])

    for g in gl:
        if 'genome_ref' in expr:
            fs['elements'][g] = [expr['genome_ref']]
        else:
            fs['elements'][g] = []

    ws.save_objects({
        'workspace':
        param['workspace_name'],
        'objects': [{
            'type': 'KBaseCollections.FeatureSet',
            'data': fs,
            'name': (param['out_fs_object_name'])
        }]
    })
Exemplo n.º 38
0
            reagent['compartment_ref'] = '~/compartments/id/' + cpd[
                'compartmentId']
            reagent['coefficient'] = cpd['stoich']
            reagent[
                'isCofactor'] = 0  # @todo Is this set separately from value in compound?
            rxn['reagents'].append(reagent)
        del rxn['equation']  # Remove after converting to reagent format
        biochem['reactions'].append(rxn)

    # Add the compartments from the compartments file.  Required fields: id, name,
    # and hierarchy.
    compartments = helper.readCompartmentsFile(args.compartmentfile,
                                               includeLinenum=False)

    for index in range(len(compartments)):
        biochem['compartments'].append(compartments[index])

    # Save the Biochemistry object to the specified workspace.
    wsClient = Workspace(args.wsurl)
    objectSaveData = dict()
    objectSaveData['type'] = 'KBaseBiochem.Biochemistry-4.0'
    objectSaveData['name'] = args.id
    objectSaveData['data'] = biochem
    #    objectSaveData['meta'] = objectMetaData
    #    objectSaveData['provenance'] = [ objectProvData ]
    objectInfo = wsClient.save_objects({
        'workspace': args.workspace,
        'objects': [objectSaveData]
    })

    exit(0)
Exemplo n.º 39
0
    def view_heatmap(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN view_heatmap
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FLTRD_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Loading data")
        token = ctx['token']
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token

        param = args

        auth_client = _KBaseAuth(self.__AUTH_SERVICE_URL)
        user_id = auth_client.get_user(token)
        workspace_name_t = Template(param['workspace_name'])
        workspace_name = workspace_name_t.substitute(user_id=user_id)
 
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        fc = ws.get_objects([{'workspace': workspace_name, 'name' : param['object_name']}])[0]['data']
        if 'original_data' not in fc:
            raise Exception("FeatureCluster object does not have information for the original ExpressionMatrix")
        oexpr = ws.get_objects([{ 'ref' : fc['original_data']}])[0]

        df2 = pd.DataFrame(oexpr['data']['data']['values'], index=oexpr['data']['data']['row_ids'], columns=oexpr['data']['data']['col_ids'])

        # L2 normalization
        df3 = df2.div(df2.pow(2).sum(axis=1).pow(0.5), axis=0)

        # type - ? level, ratio, log-ratio  <---> "untransformed"
        # scale - ? probably: raw, ln, log2, log10
        self.logger.info("Expression matrix type: {0}, scale: {1}".format(oexpr['data']['type'],oexpr['data']['scale'] ))
        # do default behavior
        factor = 0.125
        fc_df = df2 + df2[df2 !=0].abs().min().min() * factor
        if param['control_condition']  in fc_df.columns:
            fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0)).apply(np.log2)
        else:
            fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0)).apply(np.log2)
        # now fc_df will be reset
        if oexpr['data']['type'] == 'level' or oexpr['data']['type'] == 'untransformed': # need to compute fold changes
            if 'scale' not in oexpr['data'] or oexpr['data']['scale'] == 'raw' or oexpr['data']['scale'] == "1.0":
              factor = 0.125
              fc_df = df2 + df2[df2 !=0].abs().min().min() * factor
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0)).apply(np.log2)
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0)).apply(np.log2)
            else:
              fc_df = df2
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0))
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0))
              if oexpr['data']['scale'] == "log10":
                  fc_df = fc_df/np.log10(2)
              elif oexpr['data']['scale'] == "ln":
                  fc_df = fc_df/np.log(2)
              else:
                  pass
        elif oexpr['data']['type'] == 'ratio':
            fc_df = df2.apply(np.log2)
        elif oexpr['data']['type'] == 'log-ratio':
            fc_df = df2
            if oexpr['data']['scale'] == "log10":
                fc_df = fc_df/np.log10(2)
            elif oexpr['data']['scale'] == "ln":
                fc_df = fc_df/np.log(2)
            else:
                pass

        else: # do the same thing with simple level or untransformed
            if 'scale' not in oexpr['data'] or oexpr['data']['scale'] == 'raw' or oexpr['data']['scale'] == "1.0":
              factor = 0.125
              fc_df = df2 + df2[df2 !=0].abs().min().min() * factor
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0)).apply(np.log2)
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0)).apply(np.log2)
            else:
              fc_df = df2
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0))
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0))
              if oexpr['data']['scale'] == "log10":
                  fc_df = fc_df/np.log10(2)
              elif oexpr['data']['scale'] == "ln":
                  fc_df = fc_df/np.log(2)
              else:
                  pass
       
        self.logger.info("Compute cluster statistics")

        cl = {}
        afs = [];
        cid = 1;

        c_stat = pd.DataFrame()
        for cluster in fc['feature_clusters']:
         
          try: 
            fs  = cluster['id_to_pos'].keys()
          except:
            continue # couldn't find feature_set

          fsn = "Cluster_{0}".format(cid)
          cid +=1
          c_stat.loc[fsn,'size'] = len(fs)
          if 'meancor' in cluster:
              c_stat.loc[fsn,'mcor'] = cluster['meancor']
          else:
            pass
            # TODO: Add mean cor calculation later
            #raise Exception("Mean correlation is not included in FeatureCluster object") # now it is NaN

          if 'quantile' in param:
              # enforcing quantile to be in [0 .. 1] rnage
              qt = float(param['quantile'])
              if qt > 1.0: qt = 1.0
              if qt < 0.0: qt = 0.0
              c_stat.loc[fsn,'stdstat'] = fc_df.loc[fs,].std(axis=1).quantile(qt)
          else:
              c_stat.loc[fsn,'stdstat'] = fc_df.loc[fs,].std(axis=1).quantile(0.75)
         

          c1 = df3.loc[fs,].sum(axis=0)
          if df3.loc[fs,].shape[0] < 1: # empty
            continue
          cl[fsn] = fs
          #afs.extend(fs)

          #c1 = df3.loc[fs,].sum(axis=0)
          #c1 = c1 / np.sqrt(c1.pow(2).sum())
          #if(len(cl.keys()) == 1):
          #  centroids = c1.to_frame(fsn).T
          #else:
          #  centroids.loc[fsn] = c1

        # now we have centroids and statistics
        # let's subselect clusters
        min_features = 200
        if 'min_features' in param :
          min_features = param['min_features']
        
        c_stat.loc[:,'nmcor'] = c_stat.loc[:,'mcor'] / c_stat.loc[:,'mcor'].max()
        c_stat.loc[:,'nstdstat'] = c_stat.loc[:,'stdstat'] / c_stat.loc[:,'stdstat'].max()
        
        if 'use_norm_weight' in param and param['use_norm_weight'] != 0:
            if 'quantile_weight' in param:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'nmcor'] + float(param['quantile_weight']) * c_stat.loc[:,'nstdstat']
            else:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'nmcor'] + 1.0                             * c_stat.loc[:,'nstdstat']
        else:
            if 'quantile_weight' in param:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'mcor'] + float(param['quantile_weight']) * c_stat.loc[:,'stdstat']
            else:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'mcor'] + 0.1                             * c_stat.loc[:,'stdstat']

        c_stat.sort_values('weight', inplace=True, ascending=False)

        pprint(c_stat)

        centroids = pd.DataFrame()
        for i in range(c_stat.shape[0]):
            fsn = c_stat.index[i]
            fs = cl[fsn]
            if i != 0 and len(afs) + len(fs) > min_features :
                break;
           
            afs.extend(fs)

            c1 = df3.loc[fs,].sum(axis=0)
            c1 = c1 / np.sqrt(c1.pow(2).sum())
            if(centroids.shape[0] < 1):
              centroids = c1.to_frame(fsn).T
            else:
              centroids.loc[fsn] = c1
           
        pprint(centroids)
        
        if len(cl.keys()) == 0:
            raise Exception("No feature ids were mapped to dataset or no clusters were selected")
        
        # dataset centroid
        dc = df3.loc[afs,].sum(axis=0)
        dc = dc / np.sqrt(dc.pow(2).sum())
    
        
        self.logger.info("Ordering Centroids and Data")
        # the most far away cluster centroid from dataset centroid
        fc = (centroids * dc).sum(axis=1).idxmin()
        # the most far away centroid centroid from fc
        ffc = (centroids * centroids.loc[fc,]).sum(axis=1).idxmin()
        
        # major direction to order on unit ball space
        md = centroids.loc[ffc,] - centroids.loc[fc,]
        
        # unnormalized component of projection to the major direction (ignored md quantities because it is the same to all)
        corder = (centroids * md).sum(axis=1).sort_values() # cluster order
        coidx = corder.index
        
        dorder =(df3.loc[afs,] * md).sum(axis=1).sort_values() # data order
        
        # get first fs table    
        fig_properties = {"xlabel" : "Conditions", "ylabel" : "Features", "xlog_mode" : "none", "ylog_mode" : "none", "title" : "Log Fold Changes", "plot_type" : "heatmap", 'ygroup': []}
        fig_properties['ygtick_labels'] = coidx.tolist()

        if 'fold_change' in param and param['fold_change'] == 1:
            frange = 2
            if 'fold_change_range' in param:
                frange = float(param['fold_change_range'])
            final=fc_df.loc[dorder.loc[cl[coidx[0]],].index,]
            fig_properties['ygroup'].append(final.shape[0])
            
            for i in range(1,len(coidx)):
                tf = fc_df.loc[dorder.loc[cl[coidx[i]],].index,]
                fig_properties['ygroup'].append(tf.shape[0])
                final = final.append(tf)

            if 'fold_cutoff' in param and param['fold_cutoff'] == 1:
                final[final > frange] = frange
                final[final < - frange] = - frange
            else:
                fc_df0b = final.sub(final.min(axis=1), axis=0)
                final = (fc_df0b.div(fc_df0b.max(axis=1), axis=0) - 0.5) * 2 * frange
        else:
            final=df2.loc[dorder.loc[cl[coidx[0]],].index,]
            fig_properties['ygroup'].append(final.shape[0])
            
            for i in range(1,len(coidx)):
                tf = df2.loc[dorder.loc[cl[coidx[i]],].index,]
                fig_properties['ygroup'].append(tf.shape[0])
                final = final.append(tf)
        
        ## loading pvalue distribution FDT
        fdt = {'row_labels' :[], 'column_labels' : [], "data" : [[]]};
        #fdt = OrderedDict(fdt)
        # Nan to None
        final = final.where(pd.notnull(final),None)
        fdt['data'] = final.T.as_matrix().tolist() # make sure Transpose
        fdt['row_labels'] = final.columns.tolist()
        fdt['column_labels'] = final.index.tolist()
        # TODO: Add group label later
        fdt['id'] = "{0}.fdt".format(param['out_figure_object_name'])
 
        self.logger.info("Saving the results")
        sstatus = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'MAK.FloatDataTable',
                                                                              'data' : fdt,
                                                                              'hidden':1, 
                                                                              'name' : "{0}.fdt".format(param['out_figure_object_name'])}]})

        data_ref = "{0}/{1}/{2}".format(sstatus[0][6], sstatus[0][0], sstatus[0][4])
        fig_properties['data_ref'] = data_ref

        sstatus = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'CoExpression.FigureProperties',
                                                                              'data' : fig_properties,
                                                                              #'hidden':1, 
                                                                              'name' : "{0}".format(param['out_figure_object_name'])}]})
                                                                              #'name' : "{0}.fp".format(param['out_figure_object_name'])}]})

        #mchp = {}
        #mchp['figure_obj'] = "{0}/{1}/{2}".format(sstatus[0][6], sstatus[0][0], sstatus[0][4])
        #sstatus = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'CoExpression.MulticlusterHeatmapPlot',
        #                                                                      'data' : mchp,
        #                                                                      'name' : (param['out_figure_object_name'])}]})

        result = fig_properties
        #END view_heatmap

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method view_heatmap return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 40
0
    def filter_BlastOutput(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN filter_BlastOutput
        user_token=ctx['token']
        ws_client=Workspace(url=self.__WS_URL, token=user_token)
        blast_outputs=ws_client.get_objects([{'name':params['in_id'], 
                                              'workspace': params['ws_id']}])

            

        fs ={'elements': {}}
        fs['description'] = "FeatureSet from BlastOutput by "
        printedEvalue = False
        printedEntries = False
        if 'evalue' in params and params['evalue'] != "":
            fs['description'] += " E-value:{0}".format(params['evalue'])
            printedEvalue = True
        if 'entries' in params and (params['entries'] != "" or params['entries'] > 0):
            if(printedEvalue): fs['description'] += ","
            fs['description'] += " # of entries :{0}".format(params['entries'])
            printedEntries = True
        if not printedEvalue and not printedEntries:
            fs['description'] += "no filtering"
        
        if len(blast_outputs) != 1:
            fs['description'] = "No such blast output object was found : {0}/{1}".format(param['workspace_name'], param['object_name'])
        else:
            fm = {}
            f2g = {}
            for boid in blast_outputs[0]['data']['BlastOutput_iterations']['Iteration']:
                for hitd in boid['Iteration_hits']['Hit']:
                    print hitd['Hit_def']
                    ali = hitd['Hit_def'].find('#')
                    if(ali < 0): next
                    fid = hitd['Hit_def'][0:ali]
                    gri = hitd['Hit_def'].find('#', ali+1)
                    if fid not in f2g: f2g[fid] = {}
                    if (gri >=  0 and not gri == (ali+1)): 
                        grid = hitd['Hit_def'][(ali+1):gri]
                        f2g[fid][grid] = 1
                    for hspd in hitd['Hit_hsps']['Hsp']:
                        if fid in fm:
                            if float(hspd['Hsp_evalue']) < fm[fid]:
                                fm[fid] = float(hspd['Hsp_evalue'])
                        else: fm[fid] = float(hspd['Hsp_evalue'])
           
            fms = sorted(fm.items(), key=lambda x: x[1], reverse=False)
            bol = len(fms)
            if params['entries'] != "" or int(params['entries']) > 0:
                if(int(params['entries']) < bol):
                    bol = int(params['entries'])
            for i in range(bol):
                if(fms[i][1] > float(params['evalue'])): break
                if fms[i][0] in f2g:
                    fs['elements'][fms[i][0]] = f2g[fms[i][0]].keys()
                else:
                    fs['elements'][fms[i][0]] = []

        ws_client.save_objects(
            {"workspace":params['ws_id'],
            "objects": [{
                "type":"KBaseCollections.FeatureSet",
                "data":fs,
                "name":params['out_id']}
            ]})

        #pprint(fs)
        returnVal = {'obj_name' : params['out_id'], 'ws_id' : params['ws_id']}

        #END filter_BlastOutput

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method filter_BlastOutput return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
Exemplo n.º 41
0
    def create_interactive_heatmap_de_genes_old(self, ctx, heatmapParams):
        """
        :param heatmapParams: instance of type "heatmapParams" -> structure:
           parameter "sample1" of String, parameter "sample2" of String,
           parameter "q_value_cutoff" of Double, parameter
           "log2_fold_change_cutoff" of Double, parameter "num_genes" of
           Long, parameter "ws_cuffdiff_id" of type "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_expression_matrix_id1" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "ws_expression_matrix_id2" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        :returns: instance of type "ResultsToReport" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        # BEGIN create_interactive_heatmap_de_genes_old
        fparams = heatmapParams
        # returnVal = "ttt"
        # Set up workspace client
        user_token = ctx["token"]
        workspace = fparams["workspace_name"]
        ws_client = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params["token"] = user_token
        system_params["ws_url"] = self.__WS_URL
        system_params["logger"] = self.__LOGGER
        system_params["shock_url"] = self.__SHOCK_URL
        system_params["hs_url"] = self.__HS_URL
        system_params["scratch"] = self.__SCRATCH
        system_params["rscripts"] = self.__RSCRIPTS
        system_params["workspace"] = workspace

        # Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{"name": fparams["ws_cuffdiff_id"], "workspace": fparams["workspace_name"]}])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token
        )
        # cuffdiff_dir = "/kb/module/work/nnc/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        # if (cuffdiff_dir is False):
        #    return returnVal
        fparams["cuffdiff_dir"] = cuffdiff_dir
        fparams["infile"] = join(cuffdiff_dir, "gene_exp.diff")
        fparams["outfile"] = join(system_params["scratch"], "gene_exp.diff.filter")

        fparams["pairs"] = 1
        fparams["logModetmp"] = 2

        rparams = {}

        rparams["cuffdiff_dir"] = fparams["cuffdiff_dir"]
        rparams["outpng"] = join(system_params["scratch"], "heatmap.png")
        rparams["imageheight"] = 1600
        rparams["imagewidth"] = 800
        rparams["plotscript"] = join(system_params["rscripts"], "heatmapplotinteractive.R")
        rparams["include_replicates"] = 1
        rparams["pairs"] = fparams["pairs"]
        rparams["logMode"] = fparams["logModetmp"]
        rparams["removezeroes"] = 1
        rparams["outmatrix"] = join(system_params["scratch"], "outmatrix")
        reportObj = {}

        provenance = [{}]
        if "provenance" in ctx:
            provenance = ctx["provenance"]
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]["input_ws_objects"] = [workspace + "/" + fparams["ws_cuffdiff_id"]]

        report = ""
        if fparams["pairs"] != 0:

            try:
                filtered_matrix = script_util2.filter_expression_matrix(fparams, system_params)
                self.__LOGGER.info("matrix is " + filtered_matrix)
                fparams["infile"] = join(system_params["scratch"], "gene_exp.diff.filter")
                fparams["outfile"] = join(system_params["scratch"], "gene_exp.diff.filter.genelist")
                genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(fparams)
                rparams["genelist"] = filtered_matrix
            except:
                report += "There was an error in creating expression matrix"
                report += "No differentially expressed genes were found"
                report += "Please change / double check  your filtering criteria"

                reportObj = {"objects_created": [], "text_message": report}

                reportName = "create_interactive_heatmap_de_genes_old_" + str(hex(uuid.getnode()))
                report_info = ws_client.save_objects(
                    {
                        "workspace": fparams["workspace_name"],
                        "objects": [
                            {
                                "type": "KBaseReport.Report",
                                "data": reportObj,
                                "name": reportName,
                                "meta": {},
                                "hidden": 1,  # important!  make sure the report is hidden
                                "provenance": provenance,
                            }
                        ],
                    }
                )[0]
                print ("saved Report: " + pformat(report_info))

                returnVal = {
                    "report_name": reportName,
                    "report_ref": str(report_info[6]) + "/" + str(report_info[0]) + "/" + str(report_info[4]),
                }

                return [returnVal]

        try:
            # Prepare output object.
            outjson = False

            roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic(rparams)

            # Run R script to run cummerbund json and update the cummerbund output json file
            # Prepare output object.
            outputobject = dict()

            # Prepare output plot list
            cummerbundplotset = []

            # List of plots to generate
            plotlist = [
                {
                    "roptstr": roptstr_basic_heatmap_rep,
                    "title": "Heatmap",
                    "description": "Heatmap",
                    "exp": fparams["ws_expression_matrix_id"],
                }
            ]
            fparams["cummerbundplotset"] = cummerbundplotset
            # Iterate through the plotlist and generate the images and json files.
            for plot in plotlist:
                fparams["title"] = plot["title"]
                fparams["description"] = plot["description"]

                status = script_util2.rplotanduploadinteractive(system_params, fparams, rparams, plot["roptstr"])
                if status == False:
                    self.__LOGGER.info("Problem generating image and json file - " + plot["roptstr"])
                else:

                    self.__LOGGER.info(status)

                    outjson = status
                    self.__LOGGER.info("xxxxxx1")
                    with open("{0}/{1}".format(self.__SCRATCH, outjson), "r") as et2:

                        eo2 = json.load(et2)
                        genome_ref = s_res[0]["data"]["genome_id"]
                        eo2["type"] = "log2_level"
                        eo2["genome_ref"] = genome_ref
                        self.__LOGGER.info("xxxxxx2")
                        self.__LOGGER.info(workspace + self.__SCRATCH + outjson + plot["exp"])
                        res = ws_client.save_objects(
                            {
                                "workspace": workspace,
                                "objects": [
                                    {"type": "KBaseFeatureValues.ExpressionMatrix", "data": eo2, "name": plot["exp"]}
                                ],
                            }
                        )

                        info = res[0]
                        self.__LOGGER("done uploading exp")
                        report = "Successfully created expression matrix"
                        reportObj = {
                            "objects_created": [
                                {
                                    "ref": str(info[6]) + "/" + str(info[0]) + "/" + str(info[4]),
                                    "description": "Expression matrix",
                                }
                            ],
                            "text_message": report,
                        }

        except:
            report += "There was an error in generating expression matrix"
            reportObj = {"objects_created": [], "text_message": report}

        reportName = "create_interactive_heatmap_de_genes_" + str(hex(uuid.getnode()))
        report_info = ws_client.save_objects(
            {
                "workspace": fparams["workspace_name"],
                "objects": [
                    {
                        "type": "KBaseReport.Report",
                        "data": reportObj,
                        "name": reportName,
                        "meta": {},
                        "hidden": 1,  # important!  make sure the report is hidden
                        "provenance": provenance,
                    }
                ],
            }
        )[0]
        print ("saved Report: " + pformat(report_info))

        returnVal = {
            "report_name": reportName,
            "report_ref": str(report_info[6]) + "/" + str(report_info[0]) + "/" + str(report_info[4]),
        }

        # END create_interactive_heatmap_de_genes_old

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                "Method create_interactive_heatmap_de_genes_old return value "
                + "returnVal is not type dict as required."
            )
        # return the results
        return [returnVal]
Exemplo n.º 42
0
    def filter_genes(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN filter_genes
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FLTRD_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
        token = ctx['token']
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token

        param = args
 
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        expr = ws.get_objects([{'workspace': param['workspace_name'], 'name' : param['object_name']}])[0]['data']
 
 
        cmd_dowload_cvt_tsv = [self.FVE_2_TSV, '--workspace_service_url', self.__WS_URL, 
                                          '--workspace_name', param['workspace_name'],
                                          '--object_name', param['object_name'],
                                          '--working_directory', self.RAWEXPR_DIR,
                                          '--output_file_name', self.EXPRESS_FN
                              ]
 
        # need shell in this case because the java code is depending on finding the KBase token in the environment
        #  -- copied from FVE_2_TSV
        tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv), stderr=subprocess.PIPE, shell=True, env=eenv)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
 
        self.logger.info("Identifying differentially expressed genes")
 
        ## Prepare sample file
        # detect num of columns
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), 'r') as f:
          fl = f.readline()
        ncol = len(fl.split('\t'))
        
        # force to use ANOVA if the number of sample is two
        if(ncol == 3): param['method'] = 'anova'
 
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN), 'wt') as s:
          s.write("0")
          for j in range(1,ncol-1):
            s.write("\t{0}".format(j))
          s.write("\n")
 
 
        ## Run coex_filter
        cmd_coex_filter = [self.COEX_FILTER, '-i', "{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), '-o', "{0}/{1}".format(self.FLTRD_DIR, self.FLTRD_FN),
                           '-m', param['method'], '-s', "{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN),
                           '-x', "{0}/{1}".format(self.RAWEXPR_DIR, self.GENELST_FN), '-t', 'y']
        if 'num_features' in param:
          cmd_coex_filter.append("-n")
          cmd_coex_filter.append(str(param['num_features']))
 
        if 'p_value' in param:
          cmd_coex_filter.append("-p")
          cmd_coex_filter.append(str(param['p_value']))
 
        if 'p_value' not in param and 'num_features' not in param:
          self.logger.error("One of p_value or num_features must be defined");
          return empty_results("One of p_value or num_features must be defined", expr,self.__WS_URL, param, self.logger, ws)
          #sys.exit(2) #TODO: No error handling in narrative so we do graceful termination
 
        #if 'p_value' in param and 'num_features' in param:
        #  self.logger.error("Both of p_value and num_features cannot be defined together");
        #  sys.exit(3)
 
        tool_process = subprocess.Popen(cmd_coex_filter, stderr=subprocess.PIPE)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
 
        ## Header correction
        try:
            with open("{0}/{1}".format(self.FLTRD_DIR, self.FLTRD_FN), 'r') as ff:
                fe = ff.readlines()
            with open("{0}/{1}".format(self.FLTRD_DIR, self.FLTRD_FN), 'w') as ff:
                ff.write(fl) # use original first line that has correct header information
                fe.pop(0)
                ff.writelines(fe)
        except:
            self.logger.error("Output was not found");
            return empty_results("Increase p_value or specify num_features", expr,self.__WS_URL, param, self.logger, ws)
            
        
        ## checking genelist
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.GENELST_FN),'r') as glh:
          gl = glh.readlines()
        gl = [x.strip('\n') for x in gl]
 
        if(len(gl) < 1) :
          self.logger.error("No genes are selected")
          return empty_results("Increase p_value or specify num_features", expr,self.__WS_URL, param, self.logger, ws)
          #sys.exit(4)
 
        ## Upload FVE
        # change workspace to be the referenced object's workspace_name because it may not be in the same working ws due to referencing
        # Updates: change missing genome handling strategy by copying reference to working workspace
        cmd_upload_expr = [self.TSV_2_FVE, '--workspace_service_url', self.__WS_URL, 
                                          '--object_name', param['out_expr_object_name'],
                                          '--working_directory', self.FINAL_DIR,
                                          '--input_directory', self.FLTRD_DIR,
                                          '--output_file_name', self.FINAL_FN
                              ]
        tmp_ws = param['workspace_name']
        if 'genome_ref' in expr:
            obj_infos = ws.get_object_info_new({"objects": [{'ref':expr['genome_ref']}]})[0]
 
            if len(obj_infos) < 1:
                self.logger.error("Couldn't find {0} from the workspace".format(expr['genome_ref']))
                raise Exception("Couldn't find {0} from the workspace".format(expr['genome_ref']))
 
            #tmp_ws = "{0}".format(obj_infos[7])
            self.logger.info("{0} => {1} / {2}".format(expr['genome_ref'], obj_infos[7], obj_infos[1]))
            if obj_infos[7] != param['workspace_name']:
                #we need to copy it from the other workspace
                try:
                  self.logger.info("trying to copy the referenced genome object : {0}".format(expr['genome_ref']))
                  ws.copy_object({'from' : {'ref' : expr['genome_ref']},'to' : {'workspace': param['workspace_name'], 'name' : obj_infos[1]}})
                  # add genome_object_name only after successful copy
                  cmd_upload_expr.append('--genome_object_name')
                  cmd_upload_expr.append(obj_infos[1])
                except:
                  # no permission or any issues... then, give up providing genome reference
                  self.logger.info("".join(traceback.format_exc()))
                  pass
            else:
                # it is local... we can simply add reference without copying genome
                cmd_upload_expr.append('--genome_object_name')
                cmd_upload_expr.append(obj_infos[1])
 
        # updated ws name
        cmd_upload_expr.append('--workspace_name')
        cmd_upload_expr.append(tmp_ws)
 
        self.logger.info(" ".join(cmd_upload_expr))
 
        tool_process = subprocess.Popen(" ".join(cmd_upload_expr), stderr=subprocess.PIPE, shell=True, env=eenv)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
 
        
        with open("{0}/{1}".format(self.FINAL_DIR,self.FINAL_FN),'r') as et:
          eo = json.load(et)
 
        if 'description' not in expr: 
            expr['description'] = "Filtered Expression Matrix"
        expr['description'] += " : Filtered by '{1}' method ".format(expr['description'], param['method'])
 
        if 'feature_mapping' in expr and 'feature_mapping' in eo:
            expr['feature_mapping'] = eo['feature_mapping']
        expr['data'] = eo['data']
 
        ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'KBaseFeatureValues.ExpressionMatrix',
                                                                              'data' : expr,
                                                                              'name' : (param['out_expr_object_name'])}]})
 
        ## Upload FeatureSet
        fs ={'elements': {}}
        fs['description'] = "FeatureSet identified by filtering method '{0}' ".format(param['method'])
 
        fs['description'] += "from {0}/{1}".format(param['workspace_name'], param['object_name'])
 
        for g in gl:
          if 'genome_ref' in expr:
            fs['elements'][g] = [expr['genome_ref']]
          else:
            fs['elements'][g] = []
 
        ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'KBaseCollections.FeatureSet',
                                                                              'data' : fs,
                                                                              'name' : (param['out_fs_object_name'])}]})
        result = {'workspace_name' : param['workspace_name'], 'out_expr_object_name' : param['out_expr_object_name'], 'out_fs_object_name' : param['out_fs_object_name']}
        #END filter_genes

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method filter_genes return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 43
0
    def generate_cummerbund_plots(self, ctx, cummerbundParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plots

        params    = cummerbundParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace_name']
            }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        # Get input data Shock Id and Filename.
        cuffdiff_shock_id = s_res[0]['data']['file']['id']
        cuffdiff_file_name = s_res[0]['data']['file']['file_name']

        #cuffdiff_file_name =None 
        filesize = None

        # Download tar file
        dx = script_util.download_file_from_shock( self.__LOGGER, 
            self.__SHOCK_URL, cuffdiff_shock_id, cuffdiff_file_name,
            self.__SCRATCH, filesize, user_token)
    
        #Decompress tar file and keep it in a directory
        tarfile = join(self.__SCRATCH, cuffdiff_file_name)
        dstnExtractFolder = join(self.__SCRATCH, "cuffdiffData")
        if not os.path.exists(dstnExtractFolder):
            os.makedirs(dstnExtractFolder)

        untarStatus = script_util2.untar_files(self.__LOGGER, tarfile, dstnExtractFolder)
        if untarStatus == False:
            self.__LOGGER.info("Problem extracting the archive")
            return returnVal

        foldersinExtractFolder = os.listdir(dstnExtractFolder)

        if len(foldersinExtractFolder) == 0:
            self.__LOGGER.info("Problem extracting the archive")
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        cuffdiff_dir = join(dstnExtractFolder, foldersinExtractFolder[0])
	self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        # Prepare output object.
        outputobject=dict()

        # Prepare output plot list
        cummerbundplotset=[]

        # List of plots to generate
        plotlist = [
                { 'file': "dispersionplot.R",
                  'title': "Dispersion plot",
                  'description': "Dispersion plot" },
                { 'file': "pcaplot.R",
                  'title': "PCA plot",
                  'description': "PCA plot" },
                { 'file': "fpkmscvplot.R",
                  'title': "FPKM SCV plot",
                  'description': "FPKM SCV plot" }
            ]

        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS,
                plot['file'], self.__SHOCK_URL, self.__HS_URL, user_token,
                cummerbundplotset, plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info("Problem generating image and json file - " + plot["file"])


        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":params['workspace_name'],
            "objects": [{
                "type":"KBaseRNASeq.cummerbund_output",
                "data":outputobject,
                "name":params["ws_cummerbund_output"]}]
            })

        #END generate_cummerbund_plots

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plots return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 44
0
    def view_heatmap(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN view_heatmap
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FLTRD_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Loading data")
        token = ctx['token']
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token

        param = args
 
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        fc = ws.get_objects([{'workspace': param['workspace_name'], 'name' : param['object_name']}])[0]['data']
        if 'original_data' not in fc:
            raise Exception("FeatureCluster object does not have information for the original ExpressionMatrix")
        oexpr = ws.get_objects([{ 'ref' : fc['original_data']}])[0]

        df2 = pd.DataFrame(oexpr['data']['data']['values'], index=oexpr['data']['data']['row_ids'], columns=oexpr['data']['data']['col_ids'])
#        cmd_dowload_cvt_tsv = [self.FVE_2_TSV, '--workspace_service_url', self.__WS_URL, 
#                                          '--workspace_name', oexpr['info'][7],
#                                          '--object_name', oexpr['info'][1],
#                                          '--working_directory', self.RAWEXPR_DIR,
#                                          '--output_file_name', self.EXPRESS_FN
#                              ]
# 
#        # need shell in this case because the java code is depending on finding the KBase token in the environment
#        #  -- copied from FVE_2_TSV
#        tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv), stderr=subprocess.PIPE, shell=True, env=eenv)
#        stdout, stderr = tool_process.communicate()
#        
#        if stdout is not None and len(stdout) > 0:
#            self.logger.info(stdout)
# 
#        if stderr is not None and len(stderr) > 0:
#            self.logger.info(stderr)
# 
#        df = pd.read_csv("{0}/{1}".format(self.RAWEXPR_DIR,self.EXPRESS_FN), sep='\t')
#        df2 = df[df.columns[1:]]
#        rn = df[df.columns[0]]
#        df2.index = rn

        # L2 normalization
        df3 = df2.div(df2.pow(2).sum(axis=1).pow(0.5), axis=0)

        # type - ? level, ratio, log-ratio  <---> "untransformed"
        # scale - ? probably: raw, ln, log2, log10
        self.logger.info("Expression matrix type: {0}, scale: {1}".format(oexpr['data']['type'],oexpr['data']['scale'] ))
        if oexpr['data']['type'] == 'level' or oexpr['data']['type'] == 'untransformed': # need to compute fold changes
            if 'scale' not in oexpr['data'] or oexpr['data']['scale'] == 'raw' or oexpr['data']['scale'] == "1.0":
              factor = 0.125
              fc_df = df2 + df2[df2 !=0].abs().min().min() * factor
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0)).apply(np.log2)
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0)).apply(np.log2)
            else:
              fc_df = df2
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0))
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0))
              if oexpr['data']['scale'] == "log10":
                  fc_df = fc_df/np.log10(2)
              elif oexpr['data']['scale'] == "ln":
                  fc_df = fc_df/np.log(2)
              else:
                  pass
        elif oexpr['data']['type'] == 'ratio':
            fc_cf = df2.apply(np.log2)
        elif oexpr['data']['type'] == 'log-ratio':
            fc_cf = df2
            if oexpr['data']['scale'] == "log10":
                fc_df = fc_df/np.log10(2)
            elif oexpr['data']['scale'] == "ln":
                fc_df = fc_df/np.log(2)
            else:
                pass

        else: # do the same thing with simple level or untransformed
            if 'scale' not in oexpr['data'] or oexpr['data']['scale'] == 'raw' or oexpr['data']['scale'] == "1.0":
              factor = 0.125
              fc_df = df2 + df2[df2 !=0].abs().min().min() * factor
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0)).apply(np.log2)
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0)).apply(np.log2)
            else:
              fc_df = df2
              if param['control_condition']  in fc_df.columns:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[param['control_condition']]], axis=0))
              else:
                  fc_df = (fc_df.div(fc_df.loc[:,fc_df.columns[0]], axis=0))
              if oexpr['data']['scale'] == "log10":
                  fc_df = fc_df/np.log10(2)
              elif oexpr['data']['scale'] == "ln":
                  fc_df = fc_df/np.log(2)
              else:
                  pass
       
        self.logger.info("Compute cluster statistics")

        cl = {}
        afs = [];
        cid = 1;

        c_stat = pd.DataFrame()
        for cluster in fc['feature_clusters']:
         
          try: 
            fs  = cluster['id_to_pos'].keys()
          except:
            continue # couldn't find feature_set

          fsn = "Cluster_{0}".format(cid)
          cid +=1
          c_stat.loc[fsn,'size'] = len(fs)
          if 'meancor' in cluster:
              c_stat.loc[fsn,'mcor'] = cluster['meancor']
          else:
            pass
            # TODO: Add mean cor calculation later
            #raise Exception("Mean correlation is not included in FeatureCluster object") # now it is NaN

          if 'quantile' in param:
              c_stat.loc[fsn,'stdstat'] = fc_df.loc[fs,].std(axis=1).quantile(float(param['quantile']))
          else:
              c_stat.loc[fsn,'stdstat'] = fc_df.loc[fs,].std(axis=1).quantile(0.75)
         

          c1 = df3.loc[fs,].sum(axis=0)
          if df3.loc[fs,].shape[0] < 1: # empty
            continue
          cl[fsn] = fs
          #afs.extend(fs)

          #c1 = df3.loc[fs,].sum(axis=0)
          #c1 = c1 / np.sqrt(c1.pow(2).sum())
          #if(len(cl.keys()) == 1):
          #  centroids = c1.to_frame(fsn).T
          #else:
          #  centroids.loc[fsn] = c1

        # now we have centroids and statistics
        # let's subselect clusters
        min_features = 200
        if 'min_features' in param :
          min_features = param['min_features']
        
        c_stat.loc[:,'nmcor'] = c_stat.loc[:,'mcor'] / c_stat.loc[:,'mcor'].max()
        c_stat.loc[:,'nstdstat'] = c_stat.loc[:,'stdstat'] / c_stat.loc[:,'stdstat'].max()
        
        if 'use_norm_weight' in param and param['use_norm_weight'] != 0:
            if 'quantile_weight' in param:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'nmcor'] + float(param['quantile_weight']) * c_stat.loc[:,'nstdstat']
            else:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'nmcor'] + 1.0                             * c_stat.loc[:,'nstdstat']
        else:
            if 'quantile_weight' in param:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'mcor'] + float(param['quantile_weight']) * c_stat.loc[:,'stdstat']
            else:
                c_stat.loc[:,'weight'] = c_stat.loc[:,'mcor'] + 0.1                             * c_stat.loc[:,'stdstat']

        c_stat.sort_values('weight', inplace=True, ascending=False)

        pprint(c_stat)

        centroids = pd.DataFrame()
        for i in range(c_stat.shape[0]):
            fsn = c_stat.index[i]
            fs = cl[fsn]
            if i != 0 and len(afs) + len(fs) > min_features :
                break;
           
            afs.extend(fs)

            c1 = df3.loc[fs,].sum(axis=0)
            c1 = c1 / np.sqrt(c1.pow(2).sum())
            if(centroids.shape[0] < 1):
              centroids = c1.to_frame(fsn).T
            else:
              centroids.loc[fsn] = c1
           
        pprint(centroids)
        
        if len(cl.keys()) == 0:
            raise Exception("No feature ids were mapped to dataset or no clusters were selected")
        
        # dataset centroid
        dc = df3.loc[afs,].sum(axis=0)
        dc = dc / np.sqrt(dc.pow(2).sum())
    
        
        self.logger.info("Ordering Centroids and Data")
        # the most far away cluster centroid from dataset centroid
        fc = (centroids * dc).sum(axis=1).idxmin()
        # the most far away centroid centroid from fc
        ffc = (centroids * centroids.loc[fc,]).sum(axis=1).idxmin()
        
        # major direction to order on unit ball space
        md = centroids.loc[ffc,] - centroids.loc[fc,]
        
        # unnormalized component of projection to the major direction (ignored md quantities because it is the same to all)
        corder = (centroids * md).sum(axis=1).sort_values() # cluster order
        coidx = corder.index
        
        dorder =(df3.loc[afs,] * md).sum(axis=1).sort_values() # data order
        
        # get first fs table    
        fig_properties = {"xlabel" : "Conditions", "ylabel" : "Features", "xlog_mode" : "none", "ylog_mode" : "none", "title" : "Log Fold Changes", "plot_type" : "heatmap", 'ygroup': []}
        fig_properties['ygtick_labels'] = coidx.tolist()

        if 'fold_change' in param and param['fold_change'] == 1:
            frange = 2
            if 'fold_change_range' in param:
                frange = float(param['fold_change_range'])
            final=fc_df.loc[dorder.loc[cl[coidx[0]],].index,]
            fig_properties['ygroup'].append(final.shape[0])
            
            for i in range(1,len(coidx)):
                tf = fc_df.loc[dorder.loc[cl[coidx[i]],].index,]
                fig_properties['ygroup'].append(tf.shape[0])
                final = final.append(tf)

            if 'fold_cutoff' in param and param['fold_cutoff'] == 1:
                final[final > frange] = frange
                final[final < - frange] = - frange
            else:
                fc_df0b = final.sub(final.min(axis=1), axis=0)
                final = (fc_df0b.div(fc_df0b.max(axis=1), axis=0) - 0.5) * 2 * frange
        else:
            final=df2.loc[dorder.loc[cl[coidx[0]],].index,]
            fig_properties['ygroup'].append(final.shape[0])
            
            for i in range(1,len(coidx)):
                tf = df2.loc[dorder.loc[cl[coidx[i]],].index,]
                fig_properties['ygroup'].append(tf.shape[0])
                final = final.append(tf)
        
        ## loading pvalue distribution FDT
        fdt = {'row_labels' :[], 'column_labels' : [], "data" : [[]]};
        #fdt = OrderedDict(fdt)
        fdt['data'] = final.T.as_matrix().tolist() # make sure Transpose
        fdt['row_labels'] = final.columns.tolist()
        fdt['column_labels'] = final.index.tolist()
        # TODO: Add group label later
        fdt['id'] = "{0}.fdt".format(param['out_figure_object_name'])
 
        self.logger.info("Saving the results")
        sstatus = ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'MAK.FloatDataTable',
                                                                              'data' : fdt,
                                                                              'name' : "{0}.fdt".format(param['out_figure_object_name'])}]})

        data_ref = "{0}/{1}/{2}".format(sstatus[0][6], sstatus[0][0], sstatus[0][4])
        fig_properties['data_ref'] = data_ref

        sstatus = ws.save_objects({'workspace' : param['workspace_name'], 'objects' : [{'type' : 'CoExpression.FigureProperties',
                                                                              'data' : fig_properties,
                                                                              'name' : (param['out_figure_object_name'])}]})
        result = fig_properties
        #END view_heatmap

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method view_heatmap return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 45
0
def run_filter_genes(workspace_service_url=None,
                     param_file=None,
                     level=logging.INFO,
                     logger=None):
    """
    Narrative Job Wrapper script to execute coex_filter
    
    Args:
        workspace_service_url:  A url for the KBase Workspace service 
        param_file: parameter file
        object_name: Name of the object in the workspace 
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        Output is written back in WS
    
    Authors:
        Shinjae Yoo
    
    """

    try:
        os.makedirs(RAWEXPR_DIR)
    except:
        pass
    try:
        os.makedirs(FLTRD_DIR)
    except:
        pass
    try:
        os.makedirs(FINAL_DIR)
    except:
        pass

    if logger is None:
        logger = script_utils.stderrlogger(__file__)

    logger.info(
        "Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
    token = os.environ.get("KB_AUTH_TOKEN")

    with open(param_file) as paramh:
        param = json.load(paramh)

    cmd_dowload_cvt_tsv = [
        FVE_2_TSV, '--workspace_service_url', workspace_service_url,
        '--workspace_name', param['workspace_name'], '--object_name',
        param['object_name'], '--working_directory', RAWEXPR_DIR,
        '--output_file_name', EXPRESS_FN
    ]

    # need shell in this case because the java code is depending on finding the KBase token in the environment
    #  -- copied from FVE_2_TSV
    tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv),
                                    stderr=subprocess.PIPE,
                                    shell=True)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    logger.info("Identifying differentially expressed genes")

    ## Prepare sample file
    # detect num of columns
    with open("{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), 'r') as f:
        fl = f.readline()
    ncol = len(fl.split('\t'))

    with open("{0}/{1}".format(RAWEXPR_DIR, SAMPLE_FN), 'wt') as s:
        s.write("0")
        for j in range(1, ncol - 1):
            s.write("\t{0}".format(j))
        s.write("\n")

    ## Run coex_filter
    cmd_coex_filter = [
        COEX_FILTER, '-i', "{0}/{1}".format(RAWEXPR_DIR, EXPRESS_FN), '-o',
        "{0}/{1}".format(FLTRD_DIR, FLTRD_FN), '-m', param['method'], '-s',
        "{0}/{1}".format(RAWEXPR_DIR, SAMPLE_FN), '-x',
        "{0}/{1}".format(RAWEXPR_DIR, GENELST_FN), '-t', 'y'
    ]
    if 'num_features' in param:
        cmd_coex_filter.append("-n")
        cmd_coex_filter.append(param['num_features'])

    if 'num_features' not in param and 'p_value' in param:
        cmd_coex_filter.append("-p")
        cmd_coex_filter.append(param['p_value'])

    if 'p_value' not in param and 'num_features' not in param:
        logger.error("One of p_value or num_features must be defined")
        sys.exit(2)

    #if 'p_value' in param and 'num_features' in param:
    #  logger.error("Both of p_value and num_features cannot be defined together");
    #  sys.exit(3)

    tool_process = subprocess.Popen(cmd_coex_filter, stderr=subprocess.PIPE)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    ## Header correction
    with open("{0}/{1}".format(FLTRD_DIR, FLTRD_FN), 'r') as ff:
        fe = ff.readlines()
    with open("{0}/{1}".format(FLTRD_DIR, FLTRD_FN), 'w') as ff:
        ff.write(
            fl)  # use original first line that has correct header information
        fe.pop(0)
        ff.writelines(fe)

    ## Upload FVE
    from biokbase.workspace.client import Workspace
    ws = Workspace(url=workspace_service_url,
                   token=os.environ['KB_AUTH_TOKEN'])
    expr = ws.get_objects([{
        'workspace': param['workspace_name'],
        'name': param['object_name']
    }])[0]['data']

    # change workspace to be the referenced object's workspace_name because it may not be in the same working ws due to referencing
    cmd_upload_expr = [
        TSV_2_FVE, '--workspace_service_url', workspace_service_url,
        '--object_name', param['out_expr_object_name'], '--working_directory',
        FINAL_DIR, '--input_directory', FLTRD_DIR, '--output_file_name',
        FINAL_FN
    ]
    tmp_ws = param['workspace_name']
    if 'genome_ref' in expr:
        cmd_upload_expr.append('--genome_object_name')
        obj_infos = ws.get_object_info_new(
            {"objects": [{
                'ref': expr['genome_ref']
            }]})[0]

        if len(obj_infos) < 1:
            logger.error("Couldn't find {0} from the workspace".format(
                expr['genome_ref']))
            raise Exception("Couldn't find {0} from the workspace".format(
                expr['genome_ref']))

        cmd_upload_expr.append(obj_infos[1])
        tmp_ws = obj_infos[7]
        logger.info("{0} => {1} / {2}".format(expr['genome_ref'], tmp_ws,
                                              obj_infos[1]))

    # updated ws name
    cmd_upload_expr.append('--workspace_name')
    cmd_upload_expr.append(tmp_ws)

    tool_process = subprocess.Popen(" ".join(cmd_upload_expr),
                                    stderr=subprocess.PIPE,
                                    shell=True)
    stdout, stderr = tool_process.communicate()

    if stdout is not None and len(stdout) > 0:
        logger.info(stdout)

    if stderr is not None and len(stderr) > 0:
        logger.info(stderr)

    with open("{0}/{1}".format(FINAL_DIR, FINAL_FN), 'r') as et:
        eo = json.load(et)

    if 'description' in expr:
        expr['description'] = "{0}, coex_filter by {1}".format(
            expr['description'], " ".join(cmd_coex_filter))
    if 'feature_mapping' in expr:
        expr['feature_mapping'] = eo['feature_mapping']
    expr['data'] = eo['data']

    ws.save_objects({
        'workspace':
        param['workspace_name'],
        'objects': [{
            'type': 'KBaseFeatureValues.ExpressionMatrix',
            'data': expr,
            'name': (param['out_expr_object_name'])
        }]
    })

    ## Upload FeatureSet
    fs = {
        'description':
        'Differentially expressed genes generated by {0}'.format(
            " ".join(cmd_coex_filter)),
        'elements': {}
    }

    with open("{0}/{1}".format(RAWEXPR_DIR, GENELST_FN), 'r') as glh:
        gl = glh.readlines()
    gl = [x.strip('\n') for x in gl]

    for g in gl:
        if 'genome_ref' in expr:
            fs['elements'][g] = [expr['genome_ref']]
        else:
            fs['elements'][g] = []

    ws.save_objects({
        'workspace':
        param['workspace_name'],
        'objects': [{
            'type': 'KBaseCollections.FeatureSet',
            'data': fs,
            'name': (param['out_fs_object_name'])
        }]
    })
Exemplo n.º 46
0
def net_clust (args) :
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    lseries = wsd.get_object({'id' : args.inobj_id,
                  'type' : 'KBaseExpression.ExpressionSeries', 
                  'workspace' : args.ws_id})['data']

    if lseries is None:
        raise COEXException("Object {} not found in workspace {}".format(args.inobj_id, args.ws_id))

    samples, sids, genome_id = {}, [], ""
    # assume only one genome id
    for gid in sorted(lseries['genome_expression_sample_ids_map'].keys()):
        genome_id = gid
        for samid in lseries['genome_expression_sample_ids_map'][gid]:
            sids.append({'ref': samid})
        samples = wsd.get_objects(sids)
        break

    cif = open(args.exp_fn, 'w')
    header = ",".join([s['data']['source_id'] for s in samples])
    cif.write(header + "\n")
    gids = samples[0]['data']['expression_levels'].keys()  # each sample has same gids
    for gid in sorted(gids):
        line = gid + ","
        line += ",".join([str(s['data']['expression_levels'][gid]) for s in samples])
        cif.write(line + "\n")
    cif.close()


    ###
    # generate network and cluster
    net_cmd_lst = ['coex_net', '-i', args.exp_fn]
    if (args.nmethod    is not None): 
        net_cmd_lst.append("-m")
        net_cmd_lst.append(args.nmethod)
    if (args.cut_off    is not None): 
        net_cmd_lst.append("-c")
        net_cmd_lst.append(args.cut_off)
    if (args.net_fn     is not None):
        net_cmd_lst.append("-o")
        net_cmd_lst.append(args.net_fn)
    p1 = Popen(net_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
    net_cmd = " ".join(net_cmd_lst)
   
   
    clust_cmd_lst = ['coex_cluster2', '-i', args.exp_fn]
    if (args.cmethod    is not None):
        clust_cmd_lst.append("-c")
        clust_cmd_lst.append(args.cmethod)
    if (args.nmethod    is not None):
        clust_cmd_lst.append("-n")
        clust_cmd_lst.append(args.nmethod)
    if (args.k          is not None):
        clust_cmd_lst.append("-s")
        clust_cmd_lst.append(args.k)
    if (args.clust_fn   is not None):
        clust_cmd_lst.append("-o")
        clust_cmd_lst.append(args.clust_fn)
    p1 = Popen(clust_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
    clust_cmd = " ".join(clust_cmd_lst)

   
    ###
    # Create network object
    #generate Networks datasets
    net_ds_id = args.inobj_id + ".net"
    clt_ds_id = args.inobj_id + ".clt"
 
    datasets = [
      {
        'network_type' : 'FUNCTIONAL_ASSOCIATION',
        'taxons' : [ genome_id ],
        'source_ref' : 'WORKSPACE',
        'name' : net_ds_id,
        'id' : clt_ds_id,
        'description' : "Coexpression network object of " + args.inobj_id,
        'properties' : {
          'original_data_type' : 'workspace',
          'original_ws_id' : args.ws_id,
          'original_obj_id' : args.inobj_id,
          'coex_net_cmd' : net_cmd
        }
      },
      {
        'network_type' : 'FUNCTIONAL_ASSOCIATION',
        'taxons' : [ genome_id ],
        'source_ref' : 'WORKSPACE',
        'name' : clt_ds_id,
        'id' : clt_ds_id,
        'description' : "Coexpression cluster object of " + args.inobj_id,
        'properties' : {
          'original_data_type' : 'workspace',
          'original_ws_id' : args.ws_id,
          'original_obj_id' : args.inobj_id,
          'coex_clust_cmd' : clust_cmd
        }
      }
    ]
 
 
    # process coex network file
    nc = Node()
 
    cnf = open(args.net_fn,'r');
    cnf.readline(); # skip header
    for line in cnf :
        line.strip();
        line = line.replace('"','')
        values = line.split(',')
        if values[0] != values[1] : nc.add_edge(float(values[2]), net_ds_id, values[0], 'GENE', values[1], 'GENE', 0.0) #we add edges meaningful
 
 
    # process coex cluster file
    cnf = open(args.clust_fn,'r')
    cnf.readline(); # skip header
    for line in cnf :
        line = line.strip();
        line = line.replace('"','')
        values = line.split(',')
        nc.add_edge(1.0, clt_ds_id, values[0], 'GENE', "cluster." + values[1], 'CLUSTER', 0.0)
 
    # generate Networks object
    net_object = {
      'datasets' : datasets,
      'nodes' : nc.nodes,
      'edges' : nc.edges,
      'user_annotations' : {},
      'name' : 'Coexpression Network',
      'id' : args.outobj_id,
      'properties' : {
        'graphType' : 'edu.uci.ics.jung.graph.SparseMultigraph'
      }
    }
 
    # Store results object into workspace
    wsd.save_objects({'workspace' : args.ws_id, 'objects' : [{'type' : 'KBaseNetworks.Network', 'data' : net_object, 'name' : args.outobj_id, 'meta' : {'org_obj_id' : args.inobj_id, 'org_ws_id' : args.ws_id}}]})
 
    if(args.del_tmps is "true") :
        os.remove(args.exp_fn)
        os.remove(args.net_fn)
        os.remove(args.clust_fn)
Exemplo n.º 47
0
    def create_interactive_heatmap_de_genes(self, ctx,
                                            interactiveHeatmapParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_interactive_heatmap_de_genes
        fparams = interactiveHeatmapParams
        #returnVal = "ttt"
        #Set up workspace client
        user_token = ctx['token']
        workspace = fparams['workspace_name']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params['token'] = user_token
        system_params['ws_url'] = self.__WS_URL
        system_params['logger'] = self.__LOGGER
        system_params['shock_url'] = self.__SHOCK_URL
        system_params['hs_url'] = self.__HS_URL
        system_params['scratch'] = self.__SCRATCH
        system_params['rscripts'] = self.__RSCRIPTS
        system_params['workspace'] = workspace

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': fparams['ws_cuffdiff_id'],
            'workspace': fparams['workspace_name']
        }])

        #Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        #cuffdiff_dir = "/kb/module/work/nnc/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        #if (cuffdiff_dir is False):
        #    return returnVal
        fparams['cuffdiff_dir'] = cuffdiff_dir
        fparams['infile'] = join(cuffdiff_dir, "gene_exp.diff")
        fparams['outfile'] = join(system_params['scratch'],
                                  "gene_exp.diff.filter")

        filtered_matrix = script_util2.filter_expression_matrix(
            fparams, system_params)
        self.__LOGGER.info("matrix is " + filtered_matrix)

        fparams['infile'] = join(system_params['scratch'],
                                 "gene_exp.diff.filter")
        fparams['outfile'] = join(system_params['scratch'],
                                  "gene_exp.diff.filter.genelist")

        genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(
            fparams)

        # Prepare output object.
        outjson = False

        rparams = {}
        rparams['genelist'] = filtered_matrix
        rparams['cuffdiff_dir'] = fparams['cuffdiff_dir']
        rparams['outpng'] = join(system_params['scratch'], "heatmap.png")
        rparams['imageheight'] = 1600
        rparams['imagewidth'] = 800
        rparams['plotscript'] = join(system_params['rscripts'],
                                     "heatmapplotinteractive.R")
        rparams['include_replicates'] = 1
        rparams['outmatrix'] = join(system_params['scratch'], "outmatrix")

        roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic(
            rparams)

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject = dict()

        # Prepare output plot list
        cummerbundplotset = []

        # List of plots to generate
        plotlist = [{
            'roptstr': roptstr_basic_heatmap_rep,
            'title': "Heatmap",
            'description': "Heatmap",
            'exp': fparams['ws_expression_matrix_id']
        }]
        fparams['cummerbundplotset'] = cummerbundplotset
        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            fparams['title'] = plot['title']
            fparams['description'] = plot['description']

            status = script_util2.rplotanduploadinteractive(
                system_params, fparams, rparams, plot['roptstr'])
            if status == False:
                self.__LOGGER.info(
                    "Problem generating image and json file - " +
                    plot["roptstr"])
            else:
                self.__LOGGER.info(status)

                outjson = status
                with open("{0}/{1}".format(self.__SCRATCH, outjson),
                          'r') as et2:
                    eo2 = json.load(et2)
                    genome_ref = s_res[0]['data']['genome_id']
                    eo2['type'] = 'untransformed'
                    #eo2['genome_ref'] = genome_ref
                    self.__LOGGER.info(workspace + self.__SCRATCH + outjson +
                                       plot['exp'])
                    ws_client.save_objects({
                        'workspace':
                        workspace,
                        'objects': [{
                            'type': 'KBaseFeatureValues.ExpressionMatrix',
                            'data': eo2,
                            'name': plot['exp']
                        }]
                    })

        returnVal = fparams['ws_expression_matrix_id']

        #END create_interactive_heatmap_de_genes

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError(
                'Method create_interactive_heatmap_de_genes return value ' +
                'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 48
0
    def run_Coveringarray(self, ctx, params):
        """
        This example function accepts any number of parameters and returns results in a KBaseReport
        :param params: instance of mapping from String to unspecified object
        :returns: instance of type "ReportResults" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_Coveringarray

        # for each 'container_object' iterate for each option,
        # sum object options and number of objects to create strength, and factors numbers
        strength = 2
        valueList = []
        nameList = {}
        sampleSize = 0
        # turn namelist into a dict, assign name with value of len(opt2) at assignment time

        # [params]
        #   [container_object]
        #       [variable length x1,x2,xn]
        #           [name]
        #           [values]
        #               [variable length x1,x2,xn]
        # container_object is a list due to 'allow multiple' = true
        # each entry in container_object list has its own grouping of settings 1,2,3
        # all entry forms are free form text boxes associated with a known id
        # of measuring strenth and factors through volume of known id,
        # and pairs through combinations of known id,
        # user input never is used in the program to keep track of order
        # after coveringarray output is obtained, the container object list is used
        # to swap id with text form entries.
        # strength = params["strength"]
        strength = int(params['option_0'])
        #try catch for lack in media object failure
        
        if params['input_media'] == "" or params['input_media'] is None:  # flake8 change
            try:

                for setting in params['container_object']:
                    if setting['option_1'] != "":
                        nameList[setting['option_1']] = len(setting['option_2'])
                        for option in setting['option_2']:
                            valueList.append(option)
            except:
                print("Failed to read in non-media input")

            # each params["container_object"][x] is a has a list with a name
            # and another list of strings
        else:
            #try catch for media object retreival failure
            try:
                medianame = params['workspace_name']+"/"+str(params['input_media'])

                media = self.dfu.get_objects({'object_refs': [medianame]})['data'][0]['data']

                # print('\n\n ======' + str(media.items()) + '=======\n\n')
                # for modnames in params['container_object']
                #     if modnames['option_0'] == compound['name']
                #         compo
                print(media['id'])

                mediaComps = media.get("mediacompounds")

                # print('\n\n ======' + str(mediaComps.items()) + '=======\n\n')
                crefMatch = 0
                print("\n\n==cref match init"+"==\n\n")
            except:
                print("Media read in failure")


            try:
                if params['evaluation_options'] == 'append_media':
                    print("\n\n== Append Element Mode ==\n\n")
                    for compound in mediaComps:
                        
                        cref = compound['compound_ref'].split("/")[-1]
                        nameList[cref] = 2
                        valueList.append(compound['maxFlux'])
                        valueList.append(-100)

                    for setting in params['container_object']:
                        if setting['option_1'] != "":
                            nameList[setting['option_1']] = len(setting['option_2'])
                            for option in setting['option_2']:
                                valueList.append(option)
            except:
                print("Append media option failure")


                   
            try:
                if params['evaluation_options'] == 'overwrite_media':
                    ow = 0
                    print("\n\n== Overwrite Media Elements Mode ==\n\n")

                    for compound in mediaComps:
                        ow = 0

                        cref = compound['compound_ref'].split("/")[-1]

                        for setting in params['container_object']:
                            if cref == setting['option_1']:
                                ow = 1
                                nameList[cref] = len(setting['option_2'])
                                for value in setting['option_2']:
                                    valueList.append(value)
                        if ow == 0:
                            nameList[cref] = 2
                            valueList.append(compound['maxFlux'])
                            valueList.append(-100)
            except:
                print("Overwrite media option failure")



            try:
                if params['evaluation_options'] == 'isolate_media':
                    print("\n\n== Isolate Media Elements Mode ==\n\n")

                    for compound in mediaComps:
                        cref = compound['compound_ref'].split("/")[-1]

                        for setting in params['container_object']:
                            if cref == setting['option_1']:
                                nameList[cref] = 2
                                valueList.append(compound['maxFlux'])
                                valueList.append(-100)
            except:
                print("Isolate media option failure")








        sampleSize = len(nameList)
        print("\n\n== samplesize adjusted to " + str(sampleSize) + " ==\n\n")

        formattedParams = str(strength) + '\n' + str(sampleSize) + '\n'

        for name in nameList:
            formattedParams += str(nameList[name]) + ' 1\n'

        inputfile = open("inputfile.txt", 'w')

        inputfile.write(formattedParams)

        inputfile.close()

        inputfile = open("inputfile.txt", 'r')

        print("\n\n============== Formatted Input Begin ===============\n\n")

        for line in inputfile:
            print(line)

        inputfile.close()

        print("\n\n============== Formatted Input End ===============\n\n")

        try:
            os.system('/kb/module/./cover inputfile.txt -F')
            
            outputfile = open("anneal.out", 'r')
            rawout = " "

            for line in outputfile:
                rawout += line

            outputfile.close()

            outputfile = open("anneal.out", 'r')
        except:
            print("Wrapped cover tool failure")

        finaloutputText = " "
        trimmedOutFile = ""


        #if json out do this elif media out do that else
        matrixData = {
        "row_ids":[],
        "column_ids":[],
        "row_labels":['combinations'],
        "column_labels":['compounds'],
        "row_groups_ids":['1'],
        "column_groups_ids":['1'],
        "data":[[]]


        }
       

        


        

       

        for name in nameList:
            finaloutputText += name
            finaloutputText += " "
            matrixData["column_ids"].append(name)


        finaloutputText += "\n ==================== \n"

      

        # count by line instead, look for empty line followed by length 1 line to start
        matrixReadFlag = 0
        outPutLead = 0
        n=1
        for line in outputfile:

            if outPutLead != 0 and matrixReadFlag == 10:
                matrixData["row_ids"].append('row'+str(n))
                n+=1
                for c in line.split():
                    if len(line) > 2 and c != str(outPutLead):

                        finaloutputText += str(valueList[int(c)])
                        finaloutputText += ","
                        trimmedOutFile += str(valueList[int(c)])
                        trimmedOutFile += ","
                    else:

                        finaloutputText += c
                        finaloutputText += ","
                        trimmedOutFile += c
                        trimmedOutFile += ","

                finaloutputText = finaloutputText[:-1]
                finaloutputText += "\n"

            if matrixReadFlag == 3:
                outPutLead = line
                print(outPutLead)
                print("\n\n" + line + "\n\n")
                finaloutputText += "Sample Size: " + outPutLead + " \n"
                matrixReadFlag = 10

            if(line == "\n" and len(line) == 1):
                matrixReadFlag += 1


        matrixData["data"]=[[] for i in range(len(matrixData["row_ids"]))]

        listversion = [n.strip() for n in trimmedOutFile.split(',')]

        for row in range(len(matrixData["row_ids"])):
            for column in range(len(matrixData["column_ids"])):

                matrixData["data"][row].append(listversion[column+(row)*len(matrixData["column_ids"])])


        if params['evaluation_options'] == 'isolate_media':
            unchangedmedialist = []

            for compound in mediaComps:
                    cref = compound['compound_ref'].split("/")[-1]

                    if cref not in matrixData['column_ids']:
                        unchangedmedialist.append([cref,compound['maxFlux']])
                        

            for item in unchangedmedialist:
                matrixData['column_ids'].append(item[0])
                for row in matrixData["data"]:
                    row.append(item[1])


            

        

       
        #replace finaloutput text script with sourcing from matrixdata

        print("\n\n\n FINAL OUTPUT\n" + finaloutputText + "\nFINAL OUTPUT  \n\n\n" + rawout)


        if params['output_media'] is not None or params['output_json_check'] == 1:

            workspaceClient = Workspace(self.workspaceURL,token = ctx['token'])
            #try catch for json object creation 
            try:
                matrixObject = workspaceClient.save_objects({'workspace': params['workspace_name'],
                                                        'objects': [{'name':params['output_media'],
                                                        'type':'MAK.StringDataTable',
                                                        'data': matrixData}]
                                                                        })
            except:
                print("JSON out object creation")


        test_media = {
            'mediacompounds':[{'compound_ref':'testref1','concentration':100,'minFlux':0,'maxFlux':0},{'compound_ref':'testref2','concentration':100,'minFlux':100,'maxFlux':100}],
            'isMinimal':0,
            'isDefined':0,
            'type':'Undefined',
            'name':'testname',
            'id':'testid'
        }
          #  def __copy__(self):
           #     return MediaCompound(self.compound_ref,self.concentration,self.minFlux,self.maxFlux)
            #def __deepcopy__(self,memo):
             #   return MediaCompound(copy.deepcopy(self.compound_ref,self.concentration,self.minFlux,self.maxFlux,memo))

##IDEAS 10/28/21: give a default value for compound_reference.
##remove deepcopys and pass reference to preserve original object
#call workspace save on each piece before assembling
        def make_compound(compound_ref,concentration,minFlux,maxFlux):
            mediaCompound = { 
            'compound_ref': "489/6/8/"+"compounds/"+"id/"+compound_ref, ##KBaseBiochem.Biochemistry.compounds.*.id
            'concentration':concentration, #first SECTION IS WORKSPACE NAME KBASEBIOCHEM -> WORKSPACE NAME I think it uses workspaceclient getobjects2 in order to fetch, check getobjects2 api!
            'minFlux':minFlux,
            'maxFlux':maxFlux
            } #potential reason: reference cpdxxx unrecognized error: media obect meta data shows "null" for extracted ids field and with no data bout the compounds
            return mediaCompound #CAUSES: refernce data/ointers lost in the media creation process: solution, more deepcopies

        if params['output_media'] is not None and params['output_media_check'] == 1:
            media_compounds_data = []
            media_data = {}
            media_data_list = []

            for index1, case in enumerate(matrixData['data']):
                media_compounds_data = []##BELOW ISSUE: On CDG TESTS, sizes go from 20 -> 9 on 3 compound isolations, why are ~50% of reactions changing to sub 0?
                for index2, compound in enumerate(case): ###BELOW: Maybe? Is object creation tied to the test suite? I Dont remember...
                    if float(compound) > 0: ##Compound filtering for trimmed makeups will conflict with test suite expected outcome of Coveing Array Tool
                        media_compound = make_compound(matrixData['column_ids'][index2],.001,-100,float(compound))
                        media_compounds_data.append(copy.deepcopy(media_compound))
                media_data = {
                'mediacompounds':copy.deepcopy(media_compounds_data),
                'isMinimal':0,
                'isDefined':0,
                'type':'Undefined',
                'name':params['output_media']+str(index1),
                'id':params['output_media']+str(index1),
                'sourceid':params['output_media']+str(index1)
                } 
                media_data_list.append(copy.deepcopy(media_data))
            for index,media in enumerate(media_data_list):
                try:
                    workspaceClient.save_objects({'workspace': params['workspace_name'],
                                                        'objects': [{'name':media['name'],
                                                        'type':'KBaseBiochem.Media',
                                                        'data': media}]
                                                                        })
                except:
                    print("\n\n ERROR TRACE: \n\n" + traceback.format_exc()+'\n\n')
                    print("KbaseBioChem.Media object out object creation failure")
                    print("Media " + str(media['name']) + "Keys:\n" + str(media.keys())+'\n')

                    print("Media " + str(media['name']) +  "Values:\n" +'\n')

                    for x,value in enumerate(media['mediacompounds']):
                        print("Media compound "+ str(x) + ": "+ str(media['mediacompounds'][x]) +"\n")

                    print("Other media properties"+str(media['isMinimal'])+ ' '+ str(media['isDefined'])+' ' + str(media['type'])+' ' + media['name']+' ' + media['media_id'])
Exemplo n.º 49
0
    def diff_p_distribution(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN diff_p_distribution
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FLTRD_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
        token = ctx['token']
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token

        param = args

        auth_client = _KBaseAuth(self.__AUTH_SERVICE_URL)
        user_id = auth_client.get_user(token)
        workspace_name_t = Template(param['workspace_name'])
        workspace_name = workspace_name_t.substitute(user_id=user_id)
 
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        expr = ws.get_objects([{'workspace': workspace_name, 'name' : param['object_name']}])[0]['data']
 
 
        self._dumpExp2File(expr, self.RAWEXPR_DIR, self.EXPRESS_FN)
 
        self.logger.info("Identifying differentially expressed genes")
 
        ## Prepare sample file
        # detect num of columns
        ncol = len(expr['data']['col_ids'])
        
        # force to use ANOVA if the number of sample is two
        if(ncol == 3): param['method'] = 'anova'
 
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN), 'wt') as s:
          s.write("0")
          for j in range(1,ncol):
            s.write("\t{0}".format(j))
          s.write("\n")
 
 
        ## Run coex_filter
        cmd_coex_filter = [self.COEX_FILTER, '-i', "{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), '-o', "{0}/{1}".format(self.FLTRD_DIR, self.FLTRD_FN),
           '-m', param['method'], '-n', '10', '-s', "{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN),
           '-x', "{0}/{1}".format(self.RAWEXPR_DIR, self.GENELST_FN), '-t', 'y', '-j', self.PVFDT_FN]
        if 'num_features' in param:
          cmd_coex_filter.append("-n")
          cmd_coex_filter.append(str(param['num_features']))
 
        if 'p_value' in param:
          cmd_coex_filter.append("-p")
          cmd_coex_filter.append(str(param['p_value']))
 
 
        tool_process = subprocess.Popen(cmd_coex_filter, stderr=subprocess.PIPE)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
 
        ## loading pvalue distribution FDT
        pvfdt = {'row_labels' :[], 'column_labels' : [], "data" : [[]]};
        pvfdt = OrderedDict(pvfdt)
        with open(self.PVFDT_FN, 'r') as myfile:
           pvfdt = json.load(myfile)
        data_obj_name = "{0}.fdt".format(param['out_figure_object_name'])
        pvfdt['id'] = data_obj_name
 
 
        fig_properties = {"xlabel" : "-log2(p-value)", "ylabel" : "Number of features", "xlog_mode" : "-log2", "ylog_mode" : "none", "title" : "Histogram of P-values", "plot_type" : "histogram"}
        sstatus = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'MAK.FloatDataTable',
                                                                              'data' : pvfdt,
                                                                              'name' : data_obj_name}]})

        data_ref = "{0}/{1}/{2}".format(sstatus[0][6], sstatus[0][0], sstatus[0][4])
        fig_properties['data_ref'] = data_ref

        sstatus = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'CoExpression.FigureProperties',
                                                                              'data' : fig_properties,
                                                                              'name' : (param['out_figure_object_name'])}]})
        result = fig_properties
        #END diff_p_distribution

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method diff_p_distribution return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 50
0
    def filter_genes(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN filter_genes
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FLTRD_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
        token = ctx['token']
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token

        param = args

        auth_client = _KBaseAuth(self.__AUTH_SERVICE_URL)
        user_id = auth_client.get_user(token)
        workspace_name_t = Template(param['workspace_name'])
        workspace_name = workspace_name_t.substitute(user_id=user_id)
 
        provenance = [{}]
        if 'provenance' in ctx:
                provenance = ctx['provenance']
        provenance[0]['input_ws_objects']=[workspace_name+'/'+param['object_name']]
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        expr = ws.get_objects([{'workspace': workspace_name, 'name' : param['object_name']}])[0]['data']
 
        self._dumpExp2File(expr, self.RAWEXPR_DIR, self.EXPRESS_FN)
 
        self.logger.info("Identifying differentially expressed genes")
 
        ## Prepare sample file
        # detect num of columns
        ncol = len(expr['data']['col_ids'])
        
        # force to use ANOVA if the number of sample is two
        if(ncol == 3): param['method'] = 'anova'
 
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN), 'wt') as s:
          s.write("0")
          for j in range(1,ncol):
            s.write("\t{0}".format(j))
          s.write("\n")
 
 
        ## Run coex_filter
        cmd_coex_filter = [self.COEX_FILTER, '-i', "{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), '-o', "{0}/{1}".format(self.FLTRD_DIR, self.FLTRD_FN),
                           '-m', param['method'], '-s', "{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN),
                           '-x', "{0}/{1}".format(self.RAWEXPR_DIR, self.GENELST_FN), '-t', 'y']
        if 'num_features' in param:
          cmd_coex_filter.append("-n")
          cmd_coex_filter.append(str(param['num_features']))
 
        if 'p_value' in param:
          cmd_coex_filter.append("-p")
          cmd_coex_filter.append(str(param['p_value']))
 
        if 'p_value' not in param and 'num_features' not in param:
          self.logger.error("One of p_value or num_features must be defined");
          return error_report("One of p_value or num_features must be defined", expr,self.__WS_URL, workspace_name, provenance, ws)
          #sys.exit(2) #TODO: No error handling in narrative so we do graceful termination
 
        #if 'p_value' in param and 'num_features' in param:
        #  self.logger.error("Both of p_value and num_features cannot be defined together");
        #  sys.exit(3)
 
        tool_process = subprocess.Popen(cmd_coex_filter, stderr=subprocess.PIPE)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
 
        ## checking genelist
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.GENELST_FN),'r') as glh:
          gl = glh.readlines()
        gl = [x.strip('\n') for x in gl]
 
        if(len(gl) < 1) :
          self.logger.error("No genes are selected")
          return error_report("Increase p_value or specify num_features", expr,self.__WS_URL, workspace_name, provenance, ws)
          #sys.exit(4)
 
        ## Upload FVE
        if 'description' not in expr: 
            expr['description'] = "Filtered Expression Matrix"
        expr['description'] += " : Filtered by '{1}' method ".format(expr['description'], param['method'])
 
        expr = self._subselectExp(expr, gl)
 
        ex_info = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'KBaseFeatureValues.ExpressionMatrix',
                                                                              'data' : expr,
                                                                              'name' : (param['out_expr_object_name'])}]})[0]
 
        ## Upload FeatureSet
        fs ={'elements': {}}
        fs['description'] = "FeatureSet identified by filtering method '{0}' ".format(param['method'])
 
        fs['description'] += "from {0}/{1}".format(workspace_name, param['object_name'])
 
        for g in gl:
          if 'genome_ref' in expr:
            fs['elements'][g] = [expr['genome_ref']]
          else:
            fs['elements'][g] = []
 
        fs_info = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'KBaseCollections.FeatureSet',
                                                                              'data' : fs,
                                                                              'name' : (param['out_fs_object_name'])}]})[0]

        ## Create report object:
	report = "Filtering expression matrix using {0} on {1}".format(param['method'],param['object_name'])
        reportObj = {
                        'objects_created':[{
                                'ref':"{0}/{1}/{2}".format(fs_info[6], fs_info[0], fs_info[4]),
                                'description':'Filtered FeatureSet' },
                             {
                                'ref':"{0}/{1}/{2}".format(ex_info[6], ex_info[0], ex_info[4]),
                                'description':'Filetered ExpressionMatrix' 
                             }],
                        'text_message':report
                    }

        # generate a unique name for the Method report
        reportName = 'FilterExpression_'+str(hex(uuid.getnode()))
        report_info = ws.save_objects({
                                        'id':ex_info[6],
                                        'objects':[
                                        {
                                        'type':'KBaseReport.Report',
                                        'data':reportObj,
                                        'name':reportName,
                                        'meta':{},
                                        'hidden':1, 
                                        'provenance':provenance
                                        }
                                        ]
                                        })[0]

        result = { "report_name" : reportName,"report_ref" : "{0}/{1}/{2}".format(report_info[6],report_info[0],report_info[4]) }



        #result = {'workspace_name' : workspace_name, 'out_expr_object_name' : param['out_expr_object_name'], 'out_fs_object_name' : param['out_fs_object_name']}
        #END filter_genes

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method filter_genes return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 51
0
    def const_coex_net_clust(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN const_coex_net_clust
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.CLSTR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass
 
        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)
        
        result = {}
        self.logger.info("Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV")
        token = ctx['token']

        param = args

        auth_client = _KBaseAuth(self.__AUTH_SERVICE_URL)
        user_id = auth_client.get_user(token)
        workspace_name_t = Template(param['workspace_name'])
        workspace_name = workspace_name_t.substitute(user_id=user_id)

        provenance = [{}]
        if 'provenance' in ctx:
                provenance = ctx['provenance']
        provenance[0]['input_ws_objects']=[workspace_name+'/'+param['object_name']]
 
        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        expr = ws.get_objects([{'workspace': workspace_name, 'name' : param['object_name']}])[0]['data']
 
 
        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token
        self._dumpExp2File(expr, self.RAWEXPR_DIR, self.EXPRESS_FN)
 
        self.logger.info("Identifying differentially expressed genes")
 
        ## Prepare sample file
        # detect num of columns
        ncol = len(expr['data']['col_ids'])
        
        # grouping information 
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN), 'wt') as s:
          s.write("0")
          for j in range(1,ncol):
            s.write("\t{0}".format(j))
          s.write("\n")
 
 
        ## Run coex_cluster
        cmd_coex_cluster = [self.COEX_CLUSTER, '-t', 'y',
                           '-i', "{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), 
                           '-o', "{0}/{1}".format(self.CLSTR_DIR, self.CLSTR_FN), '-m', "{0}/{1}".format(self.CLSTR_DIR, self.CSTAT_FN) ]
 
        for p in ['net_method', 'minRsq', 'maxmediank', 'maxpower', 'clust_method', 'minModuleSize', 'detectCutHeight']:
           if p in param:
             cmd_coex_cluster.append("--{0}".format(p))
             cmd_coex_cluster.append(str(param[p]))
  
 
        #sys.exit(2) #TODO: No error handling in narrative so we do graceful termination
 
        #if 'p_value' in param and 'num_features' in param:
        #  self.logger.error("Both of p_value and num_features cannot be defined together");
        #  sys.exit(3)
 
        tool_process = subprocess.Popen(cmd_coex_cluster, stderr=subprocess.PIPE)
        stdout, stderr = tool_process.communicate()
        
        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)
 
        if stderr is not None and len(stderr) > 0:
            if re.search(r'^There were \d+ warnings \(use warnings\(\) to see them\)', stderr):
              self.logger.info(stderr)
            else:
              self.logger.error(stderr)
              raise Exception(stderr)
 
        
        # build index for gene list
        pos_index ={expr['data']['row_ids'][i]: i for i in range(0, len(expr['data']['row_ids']))}
 
 
        # parse clustering results
        cid2genelist = {}
        cid2stat = {}
        with open("{0}/{1}".format(self.CLSTR_DIR, self.CSTAT_FN),'r') as glh:
            glh.readline() # skip header
            for line in glh:
                cluster, mcor, msec = line.rstrip().replace('"','').split("\t")
                cid2stat[cluster]= [mcor, msec]
        with open("{0}/{1}".format(self.CLSTR_DIR, self.CLSTR_FN),'r') as glh:
            glh.readline() # skip header
            for line in glh:
                gene, cluster = line.rstrip().replace('"','').split("\t")
                if cluster not in cid2genelist:
                    cid2genelist[cluster] = []
                cid2genelist[cluster].append(gene)
 
        if(len(cid2genelist) < 1) :
          self.logger.error("Clustering failed")
          return error_report("Error: No cluster output", expr,self.__WS_URL, workspace_name, provenance, ws)
          #sys.exit(4)
 
        self.logger.info("Uploading the results onto WS")
        feature_clusters = []
        for cluster in cid2genelist:
            feature_clusters.append( {"meancor": float(cid2stat[cluster][0]), "msec": float(cid2stat[cluster][0]), "id_to_pos" : { gene : pos_index[gene] for gene in cid2genelist[cluster]}})

        ## Upload Clusters
        feature_clusters ={"original_data": "{0}/{1}".format(workspace_name,param['object_name']),
                           "feature_clusters": feature_clusters}
 
        cl_info = ws.save_objects({'workspace' : workspace_name, 'objects' : [{'type' : 'KBaseFeatureValues.FeatureClusters',
                                                                          'data' : feature_clusters,
                                                                          'name' : (param['out_object_name'])}]})[0]
        ## Create report object:
	report = "Clustering expression matrix using WGCNA on {0}".format(param['object_name'])
        reportObj = {
                        'objects_created':[                             {
                                'ref':"{0}/{1}/{2}".format(cl_info[6], cl_info[0], cl_info[4]),
                                'description':'WGCNA FeatureClusters' 
                             }],
                        'text_message':report
                    }

        # generate a unique name for the Method report
        reportName = 'WGCNA_Clusters_'+str(hex(uuid.getnode()))
        report_info = ws.save_objects({
                                        'id':cl_info[6],
                                        'objects':[
                                        {
                                        'type':'KBaseReport.Report',
                                        'data':reportObj,
                                        'name':reportName,
                                        'meta':{},
                                        'hidden':1, 
                                        'provenance':provenance
                                        }
                                        ]
                                        })[0]

        result = { "report_name" : reportName,"report_ref" : "{0}/{1}/{2}".format(report_info[6],report_info[0],report_info[4]) }
        #result = {'workspace_name' : workspace_name, 'out_object_name' : param['out_object_name']}
        #result = {'workspace' : workspace_name, 'output' : param['out_object_name']}
        #END const_coex_net_clust

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method const_coex_net_clust return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
Exemplo n.º 52
0
    def CreateRNASeqSampleSet(self, ctx, params):
        """
        :param params: instance of type "CreateRNASeqSampleSetParams"
           (FUNCTIONS used in the service) -> structure: parameter "ws_id" of
           String, parameter "sampleset_id" of String, parameter
           "sampleset_desc" of String, parameter "domain" of String,
           parameter "platform" of String, parameter "sample_ids" of list of
           String, parameter "condition" of list of String, parameter
           "source" of String, parameter "Library_type" of String, parameter
           "publication_id" of String, parameter "external_source_date" of
           String
        :returns: instance of type "RNASeqSampleSet" (Object to Describe the
           RNASeq SampleSet @optional platform num_replicates source
           publication_Id external_source_date sample_ids @metadata ws
           sampleset_id @metadata ws platform @metadata ws num_samples
           @metadata ws num_replicates @metadata ws length(condition)) ->
           structure: parameter "sampleset_id" of String, parameter
           "sampleset_desc" of String, parameter "domain" of String,
           parameter "platform" of String, parameter "num_samples" of Long,
           parameter "num_replicates" of Long, parameter "sample_ids" of list
           of String, parameter "condition" of list of String, parameter
           "source" of String, parameter "Library_type" of String, parameter
           "publication_Id" of String, parameter "external_source_date" of
           String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN CreateRNASeqSampleSet
	
	user_token=ctx['token']
        ws_client=Workspace(url=self.__WS_URL, token=user_token)
	hs = HandleService(url=self.__HS_URL, token=user_token)
	try:
	    ### Create the working dir for the method; change it to a function call
	    out_obj = { k:v for k,v in params.iteritems() if not k in ('ws_id')}  	
	    sample_ids = params["sample_ids"]
	    out_obj['num_samples'] = len(sample_ids)
	    ## Validation to check if the Set contains more than one samples
	    if len(sample_ids) < 2:
		raise ValueError("This methods can only take 2 or more RNASeq Samples. If you have only one read sample, run either 'Align Reads using Tophat/Bowtie2' methods directly for getting alignment")

	    ## Validation to Check if the number of samples is equal to number of condition
	    if len(params["condition"]) != out_obj['num_samples']:
		raise ValueError("Please specify a treatment label for each sample in the RNA-seq SampleSet. Please enter the same label for the replicates in a sample type")
	    ## Validation to Check if the user is loading the same type as specified above
	    if params["Library_type"] == 'PairedEnd' : lib_type = 'KBaseAssembly.PairedEndLibrary'
	    else: lib_type = 'KBaseAssembly.SingleEndLibrary'
	    for i in sample_ids:
	    	s_info = ws_client.get_object_info_new({"objects": [{'name': i, 'workspace': params['ws_id']}]})
                obj_type = s_info[0][2].split('-')[0]
		if obj_type != lib_type:
			raise ValueError("Library_type mentioned : {0}. Please add only {1} typed objects in Reads fields".format(lib_type,lib_type)) 
	
   	    ## Code to Update the Provenance; make it a function later
            provenance = [{}]
            if 'provenance' in ctx:
                provenance = ctx['provenance']
            #add additional info to provenance here, in this case the input data object reference
            provenance[0]['input_ws_objects']=[ params['ws_id']+'/'+sample for sample in sample_ids]
	    
	    #Saving RNASeqSampleSet to Workspace
	    self.__LOGGER.info("Saving {0} object to workspace".format(params['sampleset_id']))
	    res= ws_client.save_objects(
                                {"workspace":params['ws_id'],
                                 "objects": [{
                                                "type":"KBaseRNASeq.RNASeqSampleSet",
                                                "data":out_obj,
                                                "name":out_obj['sampleset_id'],
						"provenance": provenance}]
                                })
            returnVal = out_obj
        except Exception,e:
                raise KBaseRNASeqException("Error Saving the object to workspace {0},{1}".format(out_obj['sampleset_id'],"".join(traceback.format_exc())))
Exemplo n.º 53
0
    def const_coex_net_clust(self, ctx, args):
        # ctx is the context object
        # return variables are: result
        #BEGIN const_coex_net_clust
        try:
            os.makedirs(self.RAWEXPR_DIR)
        except:
            pass
        try:
            os.makedirs(self.CLSTR_DIR)
        except:
            pass
        try:
            os.makedirs(self.FINAL_DIR)
        except:
            pass

        if self.logger is None:
            self.logger = script_utils.stderrlogger(__file__)

        result = {}
        self.logger.info(
            "Starting conversion of KBaseFeatureValues.ExpressionMatrix to TSV"
        )
        token = ctx['token']

        param = args

        from biokbase.workspace.client import Workspace
        ws = Workspace(url=self.__WS_URL, token=token)
        expr = ws.get_objects([{
            'workspace': param['workspace_name'],
            'name': param['object_name']
        }])[0]['data']

        eenv = os.environ.copy()
        eenv['KB_AUTH_TOKEN'] = token
        cmd_dowload_cvt_tsv = [
            self.FVE_2_TSV, '--workspace_service_url', self.__WS_URL,
            '--workspace_name', param['workspace_name'], '--object_name',
            param['object_name'], '--working_directory', self.RAWEXPR_DIR,
            '--output_file_name', self.EXPRESS_FN
        ]

        # need shell in this case because the java code is depending on finding the KBase token in the environment
        #  -- copied from FVE_2_TSV
        tool_process = subprocess.Popen(" ".join(cmd_dowload_cvt_tsv),
                                        stderr=subprocess.PIPE,
                                        shell=True,
                                        env=eenv)
        stdout, stderr = tool_process.communicate()

        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)

        if stderr is not None and len(stderr) > 0:
            self.logger.info(stderr)
            #raise Exception(stderr)

        self.logger.info("Coexpression clustering analysis")

        ## Prepare sample file
        # detect num of columns
        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN),
                  'r') as f:
            fl = f.readline()
        ncol = len(fl.split('\t'))

        with open("{0}/{1}".format(self.RAWEXPR_DIR, self.SAMPLE_FN),
                  'wt') as s:
            s.write("0")
            for j in range(1, ncol - 1):
                s.write("\t{0}".format(j))
            s.write("\n")

        ## Run coex_cluster
        cmd_coex_cluster = [
            self.COEX_CLUSTER, '-t', 'y', '-i',
            "{0}/{1}".format(self.RAWEXPR_DIR, self.EXPRESS_FN), '-o',
            "{0}/{1}".format(self.CLSTR_DIR, self.CLSTR_FN)
        ]

        for p in [
                'net_method', 'minRsq', 'maxmediank', 'maxpower',
                'clust_method', 'minModuleSize', 'detectCutHeight'
        ]:
            if p in param:
                cmd_coex_cluster.append("--{0}".format(p))
                cmd_coex_cluster.append(str(param[p]))

        #sys.exit(2) #TODO: No error handling in narrative so we do graceful termination

        #if 'p_value' in param and 'num_features' in param:
        #  self.logger.error("Both of p_value and num_features cannot be defined together");
        #  sys.exit(3)

        tool_process = subprocess.Popen(cmd_coex_cluster,
                                        stderr=subprocess.PIPE)
        stdout, stderr = tool_process.communicate()

        if stdout is not None and len(stdout) > 0:
            self.logger.info(stdout)

        if stderr is not None and len(stderr) > 0:
            if re.search(
                    r'^There were \d+ warnings \(use warnings\(\) to see them\)',
                    stderr):
                self.logger.info(stderr)
            else:
                self.logger.error(stderr)
                raise Exception(stderr)

        # build index for gene list
        pos_index = {
            expr['data']['row_ids'][i]: i
            for i in range(0, len(expr['data']['row_ids']))
        }

        # parse clustering results
        cid2genelist = {}
        with open("{0}/{1}".format(self.CLSTR_DIR, self.CLSTR_FN), 'r') as glh:
            glh.readline()  # skip header
            for line in glh:
                gene, cluster = line.replace('"', '').split("\t")
                if cluster not in cid2genelist:
                    cid2genelist[cluster] = []
                cid2genelist[cluster].append(gene)

        if (len(cid2genelist) < 1):
            self.logger.error("Clustering failed")
            return empty_results("Error: No cluster output", expr,
                                 self.__WS_URL, param, self.logger, ws)
            #sys.exit(4)

        self.logger.info("Uploading the results onto WS")
        feature_clusters = []
        for cluster in cid2genelist:
            feature_clusters.append({
                "id_to_pos":
                {gene: pos_index[gene]
                 for gene in cid2genelist[cluster]}
            })

        ## Upload Clusters
        feature_clusters = {
            "original_data":
            "{0}/{1}".format(param['workspace_name'], param['object_name']),
            "feature_clusters":
            feature_clusters
        }

        ws.save_objects({
            'workspace':
            param['workspace_name'],
            'objects': [{
                'type': 'KBaseFeatureValues.FeatureClusters',
                'data': feature_clusters,
                'name': (param['out_object_name'])
            }]
        })
        result = {
            'workspace_name': param['workspace_name'],
            'out_object_name': param['out_object_name']
        }
        #END const_coex_net_clust

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method const_coex_net_clust return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]