def install(self, pkg, require=None, update=True, reinstall=False): """install a package automatically scanning CRAN and biocLite repos if require is not set and update is True, when a newest version of a package is available, it is installed """ from easydev import to_list pkgs = to_list(pkg) for pkg in pkgs: self._install(pkg, require=require, update=update, reinstall=reinstall)
def keep_tissue_in(self, tissues): """Drop tissues not in the list :param list tissues: a list of tissues to keep. If you have only one tissue, can be provided as a string. Since rows are removed some features (columns) may now be empty (all zeros). If so, those columns are dropped (except for the special columns (e.g, MSI). """ tissues = easydev.to_list(tissues) mask = self.df[self.colnames.tissue].isin(tissues) self.df = self.df[mask] self._cleanup()
def get_interactions(self, query="", frmt='json', fields=[]): """Interactions of proteins :param str query: a valid uniprot identifier (e.g. P00533). It can also be a list of uniprot identifiers, or a string with comma-separated identifiers. :param str fields: additional fields to be added to the output (e.g., sources, references) :param str frmt: format of the output (json or tabular) Example:: res_one = o.get_interactions('P00533') res_many = o.get_interactions('P00533,O15117,Q96FE5') res_many = o.get_interactions(['P00533','O15117','Q96FE5']) res_one = o.get_interactions('P00533', fields='sources') res_one = o.get_interactions('P00533', fields=['source']) res_one = o.get_interactions('P00533', fields=['source', 'references']) You may also keep query to an empty string, but the entire DB will then be downloaded. This may take time and the :attr:`timeout` may need to be increased manually. If frmt is set to TSV, the output is a TSV table with a header. If set to json, a dictionary is returned. """ # make sure there is no spaces if isinstance(query, list): query = ",".join(query) else: try: # if input is a string query = query.replace(' ', '') except: pass assert frmt in ['json', 'tsv'], "frmt must be set to json or tsv" params = {} params['format'] = frmt from easydev import to_list fields = to_list(fields) if len(fields): params['fields'] = fields #TODO handle multiple fields res = self.http_get(self.url + "interactions/%s" % query, frmt=frmt, params=params) return res
def get_interactions(self, query="", frmt='json', fields=[]): """Interactions of proteins :param str query: a valid uniprot identifier (e.g. P00533). It can also be a list of uniprot identifiers, or a string with comma-separated identifiers. :param str fields: additional fields to be added to the output (e.g., sources, references) :param str frmt: format of the output (json or tabular) Example:: res_one = o.get_interactions('P00533') res_many = o.get_interactions('P00533,O15117,Q96FE5') res_many = o.get_interactions(['P00533','O15117','Q96FE5']) res_one = o.get_interactions('P00533', fields='sources') res_one = o.get_interactions('P00533', fields=['source']) res_one = o.get_interactions('P00533', fields=['source', 'references']) You may also keep query to an empty string, but the entire DB will then be downloaded. This may take time and the :attr:`timeout` may need to be increased manually. If frmt is set to TSV, the output is a TSV table with a header. If set to json, a dictionary is returned. """ # make sure there is no spaces if isinstance(query, list): query=",".join(query) else: try: # if input is a string query = query.replace(' ', '') except: pass assert frmt in ['json', 'tsv'] ,"frmt must be set to json or tsv" params = {} params['format'] = frmt from easydev import to_list fields = to_list(fields) if len(fields): params['fields'] = fields #TODO handle multiple fields res = self.http_get(self.url + "interactions/%s" % query, frmt=frmt, params=params) return res
def drop_drugs(self, drugs): """drop a drug or a list of drugs""" drugs = easydev.to_list(drugs) tokeep = [x for x in self.drugIds if x not in drugs] self.drugIds = tokeep
def drop_cosmic(self, cosmics): """drop a drug or a list of cosmic ids""" cosmics = easydev.to_list(cosmics) tokeep = [x for x in self.cosmicIds if x not in cosmics] self.cosmicIds = tokeep
def optimise(self, NAFac=1, pmutation=0.5, selpress=1.2, popsize=50, reltol=0.1, elitism=5, maxtime=60, sizefactor=0.0001, time_index_1=1, maxgens=500, maxstallgens=100, ga_verbose=True): """Perform the optimisation and save results * Results are stored in :attr:`results` * Models with the tolerance are stored in :attr:`results.models` Parameters are those of a Genetic Algorithm used to perform the analysis. If you run again, it uses the previous best bitstirng. Set self.session.best_bitstring = None to start from the full network. """ self.logging.info("Running the optimisation. Can take a very long" "time. To see the progression, set verboseR " "attribute to True") # update config GA section with user parameters self._update_config('GA', self.optimise.actual_kwargs) # keep track of the GA parameters, which may have been update above gad = self.config.GA.as_dict() bs = self.session.get('best_bitstring') if bs is not None: bs = "c(" + ",".join([str(x) for x in list(bs)]) + ")" else: bs = 'NULL' # do we want to pre process the data ? script_template = """ library(CellNOptR) pknmodel = readSIF("%(pkn)s") cnolist = CNOlist("%(midas)s") model = preprocessing(cnolist, pknmodel, compression=%(compression)s, expansion=%(expansion)s, cutNONC=%(cutnonc)s, maxInputsPerGate=%(maxInputsPerGate)s) res = gaBinaryT1(cnolist, model, popSize=%(popsize)s, maxGens=%(maxgens)s, maxTime=%(maxtime)s, elitism=%(elitism)s, pMutation=%(pmutation)s, NAFac=%(NAFac)s, selPress=%(selpress)s, relTol=%(reltol)s, sizeFac=%(sizefactor)s, stallGenMax=%(maxstallgens)s, initBstring=%(bs)s) sim_results = cutAndPlot(cnolist, model, list(res$bString), plotParams=list(maxrow = 80, cex=0.5), plotPDF=F) sim_results2 = NULL # output are not the same... as in T1 signals = colnames(cnolist@signals$`0`) colnames(sim_results$mse) = signals for (i in seq_along(sim_results$simResults[[1]])){ colnames(sim_results$simResults[[1]][[i]]) = signals } # to be retrieved inside Python code best_bitstring = res$bString best_score = res$bScore all_scores = res$stringsTolScores all_bitstrings = res$stringsTol reactions = model$reacID results = as.data.frame(res$results) stimuli = as.data.frame(cnolist@stimuli) inhibitors = as.data.frame(cnolist@inhibitors) species = colnames(cnolist@signals[[1]]) optim1 = T """ params = { 'pkn': self.pknmodel.filename, 'midas': self.data.filename, 'compression': bool2R(self._compression), 'expansion': bool2R(self._expansion), 'cutnonc': bool2R(self._cutnonc), 'maxInputsPerGate': self._max_inputs_per_gate, 'bs':bs } params.update(gad) script = script_template % params self.session.run(script) self.reactions_r = self.session.reactions # need to change type of some columns, which are all string results = self.session.results results.columns = [x.strip() for x in results.columns] columns_int = ['Generation', 'Stall_Generation'] columns_float = ['Best_score', 'Avg_Score_Gen', 'Best_score_Gen', 'Iter_time'] results[columns_int] = results[columns_int].astype(int) results[columns_float] = results[columns_float].astype(float) # cnograph created automatically from the reactions try: N = len(self.session.best_bitstring) all_bs = self.session.all_bitstrings df = pd.DataFrame(all_bs, columns=self.reactions_r) models = BooleanModels(df) # flatten to handle exhaustive import numpy as np models.scores = np.array(list(pylab.flatten(self.session.all_scores))) try: models.cnograph.midas = self.data.copy() except Exception as err: # does not work with ExtLiverPCB # CNOError: 'The cues IFNg was found in the MIDAS file but is not present in the model. Change your model or MIDAS file.' print("something wrong in the copying of the midas into cnograph(models)") print(err.message) except: N = len(self.session.best_bitstring) all_bs = self.session.all_bitstrings if N == len(self.reactions_r) and len(self.session.all_bitstrings)>0: df = pd.DataFrame([self.session.all_bitstrings], columns=self.reactions_r) models = BooleanModels(df) models.scores = easydev.to_list(self.session.all_scores) self._models = models else: df = pd.DataFrame(columns=self.reactions_r) models = BooleanModels(df) models.scores = easydev.to_list(self.session.all_scores) self._models = models models.cnograph.midas = self.data.copy() results = { 'best_score': self.session.best_score, 'best_bitstring': self.session.best_bitstring, 'all_scores': self.session.all_scores, 'all_bitstrings': self.session.all_bitstrings, 'reactions': self._reac_cnor2cno(self.reactions_r), 'sim_results': self.session.sim_results, # contains mse and sim at t0,t1, 'results': results, 'models': models, 'stimuli': self.session.stimuli.copy(), 'inhibitors': self.session.inhibitors.copy(), 'species': self.session.species, } results['pkn'] = self.pknmodel results['midas'] = self.data self.results.results = results self.results.models = models self._called.append('optimise')