def explanation(self, result): """"# results is an object with # Pandas data frame with top 10 resutls self.similarities = None # List of paths to word clouds self.commonality_clouds = [] """ [results, disease] = result # make the df_name: df_name = 'treatments' + disease[:min(len(disease), 5)] # remove spaces and make lowercase df_name = df_name.replace(" ", "") df_name = df_name.lower() result_array = [] if len(results) == 0: result_array.append('No treatments identified in GNBR') return result_array treatment_data = iris_objects.IrisDataframe( column_names=["Phenotype", "Mesh ID", "Frequency of Annotation"], column_types=["Text", "Text", "Text"], data=results) result_array.append( 'The following treatments were identified in GNBR for %s' % disease) result_array.append(treatment_data) return result_array
def next_state_base(self, next): filename = self.read_variable("loaded_file").name dataframe = iris_objects.IrisDataframe(filename, self.context["headers"], self.context["types"], self.context["data"]) return sm.ValueState(dataframe).when_done(self.get_when_done_state())
def command(self, array): import numpy as np #print(np.median(array.to_matrix(),axis=0)) return iris_objects.IrisDataframe( column_names=array.column_names, column_types=array.column_types, data=[np.median(array.to_matrix(), axis=0)])
def get_output(self): start_from = 1 if self.read_variable("throw_away") else 0 sample_data = split_line(self.read_variable("loaded_file").content.split("\n")[start_from]) dummy_frame = iris_objects.IrisDataframe(column_names=["column {}".format(i) for i,_ in enumerate(sample_data)], column_types=["_" for x in sample_data], data=[sample_data]) return [ "What are the headers? Please enter a list of comma-separated values. I've provided a line of sample data below.", {"type":"collection", "value":dummy_frame.generate_spreadsheet_data()} ]
def next_state_base(self, text): file_str = self.context['data'] types = rows_and_types(split_line(file_str[0])) if not self.force_check: self.context["types"] = types if self.force_check or util.verify_response(text): print(types) dummy_frame = iris_objects.IrisDataframe(column_names=self.context['headers'], column_types=["String" for _ in types], data=[types]) print_types = sm.Print([{"type":"collection_select_one", "value":dummy_frame.generate_spreadsheet_data()}]) #util.prettify_data(type_obj)}]) return sm.DoAll([print_types, ChangeIndex()]).when_done(self.get_when_done_state()) return None #True, Done().when_done(self.get_when_done_state())
def get_output(self): file_str = self.read_variable("loaded_file").content start_read = 1 if self.read_variable("throw_away") else 0 headers = [x.lower() for x in split_line(file_str.split("\n")[start_read])] data_sample = [[x for x in split_line(line)] for line in file_str.split("\n")[start_read+1:start_read+4]] format_header = util.prettify_data(headers) dummy_frame = iris_objects.IrisDataframe(column_names=headers, column_types=headers, data=data_sample) return [ "Here are the headers I inferred from the first line. Do these look good?", {"type":"collection", "value":dummy_frame.generate_spreadsheet_data()} ]
def next_state_base(self, text): file_str = self.read_variable("loaded_file").content lines = file_str.split("\n") num_cols = len(split_line(lines[0])) headers = ["column{}".format(i) for i in range(0,num_cols)] self.context['headers'] = headers start_from = 1 if self.read_variable("throw_away") else 0 self.context['data'] = file_str.split("\n")[start_from:] format_header = util.prettify_data(headers) data_sample = [[x for x in split_line(line)] for line in self.context['data'][start_from+1:start_from+4]] dummy_frame = iris_objects.IrisDataframe(column_names=headers, column_types=headers, data=data_sample) return sm.Print([{"type":"collection", "value":dummy_frame.generate_spreadsheet_data()}]).when_done(self.get_when_done_state())
def command(self, documents): documents = documents.to_matrix().flatten() import numpy as np from empath import Empath lexicon = Empath() to_df = [] out_dict = lexicon.analyze(documents.tolist(), normalize=True) for k, v in sorted(out_dict.items(), key=lambda x: x[1], reverse=True): to_df.append([k, v]) #to_df = np.array(to_df) return iris_objects.IrisDataframe( column_names=["category", "normalized_count"], column_types=["String", "Number"], data=to_df)
def test_dataframe(self): dataframe = iris_objects.IrisDataframe([{ "firstname": "Ethan", "lastname": "Fast" }, { "firstname": "Binbin", "lastname": "Chen" }]) dataframe2 = dataframe.add_column("age", [27, 27]) dataframe3 = dataframe.add_columns( ["age", "occupation"], [[27, 27], ["CS PhD Student", "Bioinformatics PhD Student"]]) data = dataframe3.generate_spreadsheet_data() dataframe.add_rows([""]) print(dataframe) print(dataframe2.df["age"]) print(dataframe3.df["occupation"])
def command(self, file, bool_image, bool_pubmed, bool_other_disease, bool_display): import pandas as pd panda_df = pd.read_csv(file.path, sep='\t') iris_df = iris_objects.IrisDataframe(data=panda_df) # print('read in file') self.iris.add_to_env('drug_disease_list', iris_df) # generate options object task_dir = os.path.join( results_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) options = Options(gen_image=bool_image, gen_pubmed=bool_pubmed, outPath=task_dir) # get list of drugs and conditions drug_list = list(panda_df.ix[:, 0]) disease_list = list(panda_df.ix[:, 1]) # store within the directory as a list answer_arr = [task_dir] for drug, disease in zip(drug_list, disease_list): answer = Q2_query(drug, disease, options) if isinstance(answer, str): answer_str = answer answer = {} answer['error'] = answer_str if bool_other_disease: answer["other_disease"] = run_main.find_drug_indications(drug) answer['drug'] = drug.strip() answer['disease'] = disease.strip() answer_arr.append(answer) if bool_display: return answer_arr else: return ' '.join(("written results to:", task_dir))
def explanation(self, result): """" results is an object with Pandas data frame with top 10 resutls self.similarities = None # List of paths to word clouds self.commonality_clouds = [] """ condition, results = result # make the df_name: df_name = 'similarities_' + condition[:min(len(condition), 5)] # remove spaces and make lowercase df_name = df_name.replace(" ", "") df_name = df_name.lower() result_array = [] if results.error is not None: result_array.append( 'There was an error processing your request for %s' % condition) return result_array # adds the table to results similarities = results.top_similarities() if similarities is None: result_array.append('No similarities could be computed') else: similarities_df = iris_objects.IrisDataframe(data=similarities) # adds the table to results self.iris.add_to_env(df_name, similarities_df) result_array.append(similarities_df) # display image (first one) #if len(results.commonality_clouds > 0): # os.system("open " + results.commonality_clouds[0]) return result_array
def explanation(self, result): """" results is an object with a Pandas data frame """ [results, condition] = result # make the df_name: df_name = 'similarities_' + condition[:min(len(condition), 5)] # remove spaces and make lowercase df_name = df_name.replace(" ", "") df_name = df_name.lower() result_array = [] if results.error is not None: result_array.append('There was an error processing your request') return result_array similarities = results.top_similarities() if similarities is None: result_array.append('No similarities could be computed') else: result_array.append('The following genetic diseases are the most semantically similar to your query') # adds the table to results similarities_df = iris_objects.IrisDataframe(data=similarities) self.iris.add_to_env(df_name, similarities_df) result_array.append(similarities_df) # display image (first one) # if len(results.commonality_clouds > 0): # os.system("open " + results.commonality_clouds[0]) return result_array
def explanation(self, result): # Components of result are in dictionary form if "error" in result: return result['error'] query_name = result['drug'][:min(len(result['drug']), 3 )] + "_" + result['disease'][:min( len(result['disease']), 3)] query_name = '_'.join(query_name.split(' ')) query_name = "_" + query_name.lower() print(query_name, "== query name") # Print out genes associated with drug query_statement = 'How does ' + result['drug'] + '(' + result[ 'drug_id'] + ') treat ' + result['disease'] + '(' + result[ 'disease_id'] + ').' result_array = ['Here are your results for: %s' % query_statement] result_array.append( 'Top genes found to be targetted by %s are below. Full dataset saved as drug_genes_{drug_disease}' % result['drug']) drug_gene_term_object = iris_objects.IrisDataframe( data=result['drug_genes']) self.iris.add_to_env('drug_genes' + query_name, drug_gene_term_object) drug_gene_term_object_short = iris_objects.IrisDataframe( data=result['drug_genes_short']) result_array.append(drug_gene_term_object_short) # result_array.append("Full dataset saved as drug_associated_genes") # Print out genes associated with disease result_array.append( 'Top genes found to be associated with %s are below. Full dataset saved as disease_genes_{drug_disease}' % result['disease']) disease_gene_term_object = iris_objects.IrisDataframe( data=result['disease_genes']) self.iris.add_to_env('disease_genes' + query_name, disease_gene_term_object) disease_gene_term_object_short = iris_objects.IrisDataframe( data=result['disease_genes_short']) result_array.append(disease_gene_term_object_short) # Print out signficant go terms try: result_array.append( 'Top significant GO terms associated with the drug-disease interaction are shown. Full dataset saved as go_terms_{drug_disease}' ) go_term_object = iris_objects.IrisDataframe( data=result['GOENRICH']) self.iris.add_to_env('go_terms' + query_name, go_term_object) go_term_object_short = iris_objects.IrisDataframe( data=result['GOENRICH_short']) result_array.append(go_term_object_short) except: result_array.append('No significant GO terms found') # result_array.append("Full dataset saved as drug_disease_go_terms") # get tissue = disease if 'tissue_df_dis' in result: result_array.append( 'The most relevant tissues, in which disease genes are differentially expressed, are shown. Full dataset saved as tissues_{drug_disease} ' ) tissue_object_dis = iris_objects.IrisDataframe( data=result['tissue_df_dis']) tissue_object_dis_short = iris_objects.IrisDataframe( data=result['tissue_df_dis_short']) self.iris.add_to_env('tissues_disease' + query_name, tissue_object_dis) result_array.append(tissue_object_dis_short) else: result_array.append( 'No differential tissue expression in disease state detected.') if "pubmed" in result: if isinstance(result["pubmed"], str): result_array.append(result["pubmed"]) else: result_array.append( "Following are PMIDs that support the interaction: Full dataset saved as pmid_{drug_disease}." ) pmid_df_short = iris_objects.IrisDataframe( data=result["pubmed_short"]) pmid_df = iris_objects.IrisDataframe(data=result["pubmed"]) self.iris.add_to_env('pmid' + query_name, pmid_df) result_array.append(pmid_df_short) # result_array.append("Full dataset saved as pmid_ids") # get other possible disease if "other_disease" in result: ph_genes_str, drug = result["other_disease"] ph_genes_arr = ph_genes_str.split('\t') # prb, BH, ph, sig_genes ph_genes_array_all = [ ph_genes_arr[x:x + 4] for x in range(0, len(ph_genes_arr), 4) ] if len(ph_genes_arr) >= 4: # add explanation multi_answer_line = [ 'Top hits of diseases potentially impacted by %s. Full dataset saved as drug_indications_{drug_disease}.' % result['drug'], 'We queried the gene neighborhood of drug targets and found the following phenotypes to be significant. Here we list significant phenotypes in order of probability. Column headings are phenotype, probability, significance level cutoff, and a list of genes that support the relationship' ] result_array = result_array + multi_answer_line ph_genes_array_all_iris = iris_objects.IrisDataframe( column_names=[ "Phenotype", "probability", "Benjamin Hochberg significance cutoff", "list of genes" ], column_types=["Text", "Text", "Text", "Text"], data=ph_genes_array_all) self.iris.add_to_env('drug_indications' + query_name, ph_genes_array_all_iris) ph_genes_array_short = [ ph_genes_arr[x:x + 4] for x in range(0, min(5 * 4, len(ph_genes_arr)), 4) ] ph_genes_array_short_iris = iris_objects.IrisDataframe( column_names=[ "Phenotype", "Probability", "Benjamin Hochberg significance cutoff", "list of genes" ], column_types=["Text", "Text", "Text", "Text"], data=ph_genes_array_short) result_array.append(ph_genes_array_short_iris) else: result_array.append('No other drug indications found') # result_array.append("Full dataset saved as drug_indications") # display image if "image_file" in result: result_array.append('Diagram stored in: %s' % result["image_file"]) os.system("open " + result["image_file"]) result_array.append( "Full dataframes are available for viewing using the command: print {dataframe_name}. See right side panel for more information." ) result_array.append( "The suffix for the drug-disease interaction pair is: %s" % query_name) result_array.append("Results are also stored in: %s" % results_dir) return result_array
def explanation(self, results): if isinstance(results, str): return results else: task_dir = results.pop(0) explanation_array = [ 'Result tables for each query are stored in the right side bar as variables. You can view a table using the command: print {dataframe_name}_{suffix}.' ] explanation_array.append( 'Diagrams (if requested) and other results can be found in the results directory: %s' % task_dir) explanation_array.append( 'Suffix and variable information is displayed below') # iterate through every drug-disease pair drug_arr = [] disease_arr = [] worked_arr = [] suffix_arr = [] assoc_variables = [] for result in results: print('results', result['drug'], result['disease']) drug_arr.append(result['drug']) disease_arr.append(result['disease']) if 'error' in result: worked_arr.append(result['error']) suffix_arr.append('') assoc_variables.append('') else: worked_arr.append('SUCCESS') # get suffix information query_name = result[ 'drug'][:min(len(result['drug']), 3)] + "_" + result[ 'disease'][:min(len(result['disease']), 3)] query_name = ''.join(query_name.split(' ')) query_name = "_" + query_name.lower() suffix_arr.append(query_name) # get associated drug genes drug_gene_term_object = iris_objects.IrisDataframe( data=result['drug_genes']) self.iris.add_to_env('drug_genes' + query_name, drug_gene_term_object) # get genes associated with disease disease_gene_term_object = iris_objects.IrisDataframe( data=result['disease_genes']) self.iris.add_to_env('disease_genes' + query_name, disease_gene_term_object) # get out signficant go terms go_term_object = iris_objects.IrisDataframe( data=result['GOENRICH']) self.iris.add_to_env('go_terms' + query_name, go_term_object) variable_info = [ 'drug_genes' + query_name, 'disease_genes' + query_name, 'go_terms' + query_name ] # get tissue = disease if 'tissue_df_dis' in result: variable_info.append('tissues_disease' + query_name) tissue_object_dis = iris_objects.IrisDataframe( data=result['tissue_df_dis']) self.iris.add_to_env('tissues_disease' + query_name, tissue_object_dis) if "pubmed" in result: if not isinstance(result["pubmed"], str): variable_info.append('pmid' + query_name) pmid_df = iris_objects.IrisDataframe( data=result["pubmed"]) self.iris.add_to_env('pmid' + query_name, pmid_df) # get other possible disease if "other_disease" in result: ph_genes_str, drug = result["other_disease"] ph_genes_arr = ph_genes_str.split( '\t') # prb, BH, ph, sig_genes if len(ph_genes_arr) >= 4: ph_genes_array_all = [ ph_genes_arr[x:x + 4] for x in range(0, len(ph_genes_arr), 4) ] ph_genes_array_all_iris = iris_objects.IrisDataframe( column_names=[ "Phenotype", "probability", "Benjamin Hochberg significance cutoff", "list of genes" ], column_types=["Text", "Text", "Text", "Text"], data=ph_genes_array_all) self.iris.add_to_env( 'drug_indications' + query_name, ph_genes_array_all_iris) variable_info.append('drug_indications' + query_name) assoc_variables.append(', '.join(variable_info)) # Save info as an iris dataframe info_data = [ list(x) for x in zip(drug_arr, disease_arr, worked_arr, suffix_arr, assoc_variables) ] info_df = iris_objects.IrisDataframe( column_names=[ "Drug", "Disease", "Query Status", "Suffix", "Associated Variables" ], column_types=["Text", "Text", "Text", "Text", "Text"], data=info_data) explanation_array.append(info_df) return explanation_array
def command(self, array): from scipy.stats import skew return iris_objects.IrisDataframe( column_names=array.column_names, column_types=array.column_types, data=[skew(array.to_matrix(), axis=0, nan_policy='omit')])
def explanation(self, results): iris_df = iris_objects.IrisDataframe(data=results) return [ "These are the enriched molecular function GO terms with corrected p-values", iris_df]
def command(self, array): import numpy as np return iris_objects.IrisDataframe(column_names=array.column_names, column_types=array.column_types, data=[array.to_matrix().min(axis=0)])