class FilterByInteraction(GenericBlock): block_base_name = "FILTER_BY_BI" name = "Filter ES by interaction" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _mRNA_es = InputBlockField(name="mRNA_es", order_num=10, required_data_type="ExpressionSet", required=True) _miRNA_es = InputBlockField(name="miRNA_es", order_num=20, required_data_type="ExpressionSet", required=True) _interaction = InputBlockField(name="interaction", order_num=30, required_data_type="BinaryInteraction", required=True) m_rna_filtered_es = OutputBlockField(name="m_rna_filtered_es", provided_data_type="ExpressionSet") mi_rna_filtered_es = OutputBlockField(name="mi_rna_filtered_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FilterByInteraction, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA_es = self.get_input_var("mRNA_es") miRNA_es = self.get_input_var("miRNA_es") interaction_matrix = self.get_input_var("interaction") self.celery_task = wrapper_task.s( filter_by_bi, exp, self, m_rna_es=mRNA_es, mi_rna_es=miRNA_es, interaction_matrix=interaction_matrix, base_filename="%s_filtered_by_BI" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, m_rna_filtered_es, mi_rna_filtered_es): self.set_out_var("m_rna_filtered_es", m_rna_filtered_es) self.set_out_var("mi_rna_filtered_es", mi_rna_filtered_es) exp.store_block(self)
class UploadGeneSets(GenericBlock): block_base_name = "GENE_SETS_UPLOAD" block_group = GroupType.INPUT_DATA name = "Upload Gene Sets" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "done"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) upload_gs = ParamField("upload_gs", title="Gene sets in .gmt format", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) _gene_sets = OutputBlockField(name="gene_sets", provided_data_type="GeneSets") def on_params_is_valid(self, exp, *args, **kwargs): try: gmt_file = self.upload_gs.get_file() gs = GmtStorage.read_inp(gmt_file, "\t") gene_sets = GeneSets(exp.get_data_folder(), str(self.uuid)) gene_sets.store_gs(gs) self.set_out_var("gene_sets", gene_sets) except Exception as e: exp.log(self.uuid, e, severity="CRITICAL") log.error(e) exp.store_block(self)
class EnrichmentNoTBlock(GenericBlock): block_base_name = "ENRICHMENT_COM" name = "Comodule Enrichment" is_abstract = False block_group = GroupType.TESTING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _cs_1 = InputBlockField(name="gs", order_num=10, required_data_type="GeneSets", required=True) H = InputBlockField(name="patterns", order_num=11, required_data_type="GeneSets", required=True) _t = ParamField(name="T", order_num=12, title="Enrichment threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val="0.05") dict = OutputBlockField(name="dictionary_set", provided_data_type="DictionarySet") def __init__(self, *args, **kwargs): super(EnrichmentNoTBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() gs = self.get_input_var("gs") cs = self.get_input_var("patterns") self.celery_task = wrapper_task.s(enrichment_no_t_task, exp, self, T=self.T, gs=gs, patterns=cs, base_filename="%s_%s_enrich" % (self.uuid, 'enrichment_cont')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("dictionary_set", flt_es) exp.store_block(self)
class MergeGeneSetWithPlatformAnnotation(GenericBlock): block_base_name = "MERGE_GS_GPL_ANN" name = "Merge gene set with platform" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_gs = InputBlockField(name="gs", order_num=10, required_data_type="GeneSets", required=True) _input_ann = InputBlockField(name="ann", order_num=20, required_data_type="PlatformAnnotation", required=True) _gs = OutputBlockField(name="gs", field_type=FieldType.HIDDEN, init_val=None, provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(MergeGeneSetWithPlatformAnnotation, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() gs, ann = self.get_input_var("gs"), self.get_input_var("ann") # import ipdb; ipdb.set_trace() self.celery_task = wrapper_task.s(map_gene_sets_to_probes, exp, self, base_dir=exp.get_data_folder(), base_filename="%s_merged" % self.uuid, ann_gene_sets=ann.gene_sets, src_gene_sets=gs) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, gs): self.set_out_var("gs", gs) exp.store_block(self)
class SvdSubAgg(GenericBlock): is_abstract = True block_group = GroupType.AGGREGATION is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _mRNA_es = InputBlockField(name="mRNA_es", order_num=10, required_data_type="ExpressionSet", required=True) _miRNA_es = InputBlockField(name="miRNA_es", order_num=20, required_data_type="ExpressionSet", required=True) _interaction = InputBlockField(name="interaction", order_num=30, required_data_type="BinaryInteraction", required=True) c = ParamField(name="c", title="Constant c", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0) agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet") mode = "" def __init__(self, *args, **kwargs): super(SvdSubAgg, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA_es = self.get_input_var("mRNA_es") miRNA_es = self.get_input_var("miRNA_es") interaction_matrix = self.get_input_var("interaction") self.celery_task = wrapper_task.s( aggregation_task, exp, self, mode=self.mode, c=self.c, m_rna_es=mRNA_es, mi_rna_es=miRNA_es, interaction_matrix=interaction_matrix, base_filename="%s_%s_agg" % (self.uuid, self.mode) ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, agg_es): self.set_out_var("agg_es", agg_es) exp.store_block(self)
class GlobalTest(GenericBlock): block_base_name = "GLOBAL_TEST" name = "Goeman global test" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _input_gs = InputBlockField(name="gs", order_num=20, required_data_type="GeneSets", required=True) _result = OutputBlockField(name="result", field_type=FieldType.STR, provided_data_type="TableResult", init_val=None) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "gt_result.html" ]) def __init__(self, *args, **kwargs): super(GlobalTest, self).__init__(*args, **kwargs) self.celery_task = None exp = Experiment.get_exp_by_id(self.exp_id) self.result = TableResult( base_dir=exp.get_data_folder(), base_filename="%s_gt_result" % self.uuid, ) self.result.headers = ['p-value', 'Statistic', 'Expected', 'Std.dev', '#Cov'] def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( global_test_task, exp, self, es=self.get_input_var("es"), gene_sets=self.get_input_var("gs"), table_result=self.result ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, result, *args, **kwargs): self.result = result self.set_out_var("result", self.result) exp.store_block(self)
class MergeExpressionSets(GenericBlock): block_base_name = "MergeES" name = "Merge ES by concatenation" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es_1 = InputBlockField(name="es_1", title="Set 1", order_num=10, required_data_type="ExpressionSet", required=True) _es_2 = InputBlockField(name="es_2", title="Set 2", order_num=20, required_data_type="ExpressionSet", required=True) merged_es = OutputBlockField(name="merged_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(MergeExpressionSets, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() # import ipdb; ipdb.set_trace() self.celery_task = wrapper_task.s( merge_two_es, exp, self, es_1=self.get_input_var("es_1"), es_2=self.get_input_var("es_2"), base_filename="%s_merged" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, es): self.set_out_var("merged_es", es) exp.store_block(self)
class ThresholdBlock(GenericBlock): block_base_name = "THRESHOLD" name = "Threshold" is_abstract = False block_group = GroupType.SNMNMF is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) t = ParamField(name="T", title="Threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) flt_es = OutputBlockField(name="gene_sets", provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(ThresholdBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") # T = self.get_input_var("T") self.celery_task = wrapper_task.s(threshold_task, exp, self, es=es, T=self.T, base_filename="%s_%s_thr" % (self.uuid, 'threshold')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("gene_sets", flt_es) exp.store_block(self)
class UploadInteraction(GenericBlock): block_base_name = "GENE_INTERACTION" block_group = GroupType.INPUT_DATA name = "Upload gene interaction" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "done"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) upload_interaction = ParamField("upload_interaction", title="Interaction matrix", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) row_units = ParamField("row_units", title="Row units", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) col_units = ParamField("col_units", title="Column units", order_num=12, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) _interaction = OutputBlockField(name="interaction", provided_data_type="BinaryInteraction") def on_params_is_valid(self, exp, *args, **kwargs): # Convert to BinaryInteraction interaction_df = self.upload_interaction.get_as_data_frame() interaction = BinaryInteraction(exp.get_data_folder(), str(self.uuid)) interaction.store_matrix(interaction_df) interaction.row_units = self.row_units interaction.col_units = self.col_units self.set_out_var("interaction", interaction) exp.store_block(self)
class ZScoreBlock(GenericBlock): block_base_name = "ZSCORE_NORM" name = "Z-score Normalization" is_abstract = False block_group = GroupType.NORMALIZATION is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) flt_es = OutputBlockField(name="flt_zscore_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(ZScoreBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") self.celery_task = wrapper_task.s(zscore_task, exp, self, es=es, base_filename="%s_%s_flt" % (self.uuid, 'zscore')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("flt_zscore_es", flt_es) exp.store_block(self)
class GetBroadInstituteGeneSet(GenericBlock): block_base_name = "BI_GENE_SET" block_group = GroupType.INPUT_DATA name = "Get MSigDB Gene Set" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "done", reload_block_in_client=True), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) # TODO: maybe create more general solution ? _all_gene_sets = BlockField("all_gene_sets", title="", input_type=InputType.HIDDEN, field_type=FieldType.RAW, is_a_property=True) msigdb_id = ParamField( name="msigdb_id", title="MSigDB gene set", input_type=InputType.SELECT, field_type=FieldType.INT, init_val=0, # TODO: fix hardcoded value select_provider="all_gene_sets") _gs = OutputBlockField(name="gs", field_type=FieldType.HIDDEN, provided_data_type="GeneSets") @property def all_gene_sets(self): return BroadInstituteGeneSet.get_all_meta() def on_params_is_valid(self, exp): gs = BroadInstituteGeneSet.objects.get( pk=self.msigdb_id).get_gene_sets() self.set_out_var("gs", gs) super(GetBroadInstituteGeneSet, self).on_params_is_valid(exp)
class PatternSearch(GenericBlock): block_base_name = "PattSearch" name = "Pattern Search" block_group = GroupType.PATTERN_SEARCH is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True) _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=False) gene2gene = InputBlockField(name="gene2gene", order_num=30, required_data_type="BinaryInteraction", required=True) miRNA2gene = InputBlockField(name="miRNA2gene", order_num=31, required_data_type="BinaryInteraction", required=False) genes_num = ParamField(name="genes_num", title="Number of Genes", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=100) # upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=32, # input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) # upload_mirna_platform = ParamField("upload_mirna_platform", title="miRNA platform", order_num=33, # input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) d = ParamField(name="d", order_num=70, title="d", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=2) min_imp = ParamField(name="min_imp", order_num=80, title="Minimal improvement", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.06) _metric = ParamField( "metric", title="Metric", order_num=40, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mutual_information", options={ "inline_select_provider": True, "select_options": [["mutual_information", "Mutual Information"], ['normed_mutual_information', "Normed Mutual Information"], ['square_error', "Square Error"], ['correlation', "Correlation"], ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]] }) patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(PatternSearch, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() exp.log(self.uuid, "Execute called") self.celery_task = wrapper_task.s( pattern_search, exp, self, m_rna_es=self.get_input_var("mRNA"), mi_rna_es=self.get_input_var("miRNA"), gene2gene=self.get_input_var("gene2gene"), miRNA2gene=self.get_input_var("miRNA2gene"), radius=self.d, min_imp=self.min_imp, number_of_genes=self.genes_num, metric=self.get_input_var("metric"), base_filename="%s_comodule_sets" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, gs): exp.log(self.uuid, "Success") self.set_out_var("patterns", gs) exp.store_block(self)
class UserUploadComplex(GenericBlock): # unit_options = block_base_name = "UPLOAD_CMPLX" block_group = GroupType.INPUT_DATA name = "Upload mRna/miRna/methyl" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data"), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params"), ]) m_rna_matrix = ParamField("m_rna_matrix", title="mRNA expression", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) m_rna_unit = ParamField( "m_rna_unit", title="Working unit [used when platform is unknown]", order_num=12, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"], ["Symbol", "Symbol"]] }) m_rna_matrix_ori = ParamField("m_rna_matrix_ori", title="Matrix orientation", order_num=13, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_m_rna = ParamField("csv_sep_m_rna", title="CSV separator symbol", order_num=14, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) mi_rna_matrix = ParamField("mi_rna_matrix", title=u"μRNA expression", order_num=20, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) mi_rna_platform = ParamField("mi_rna_platform", title="Platform ID", order_num=21, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) mi_rna_unit = ParamField( "mi_rna_unit", title="Working unit [used when platform is unknown]", order_num=22, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["mirbase", "miRBase ID"]] }) mi_rna_matrix_ori = ParamField("mi_rna_matrix_ori", title="Matrix orientation", order_num=23, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_mi_rna = ParamField("csv_sep_mi_rna", title="CSV separator symbol", order_num=24, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) methyl_matrix = ParamField("methyl_matrix", title="Methylation expression", order_num=30, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) methyl_platform = ParamField("methyl_platform", title="Platform ID", order_num=31, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) # methyl_unit = ParamField("methyl_unit", title="Working unit [used when platform is unknown]", init_val=None, # order_num=32, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) methyl_matrix_ori = ParamField("methyl_matrix_ori", title="Matrix orientation", order_num=33, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_methyl = ParamField("csv_sep_methyl", title="CSV separator symbol", order_num=34, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=40, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) csv_sep_pheno = ParamField("csv_sep_pheno", title="CSV separator symbol", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) _m_rna_es = OutputBlockField(name="m_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") # _m_rna_annotation = OutputBlockField(name="m_rna_annotation", field_type=FieldType.HIDDEN, # provided_data_type="PlatformAnnotation") _mi_rna_es = OutputBlockField(name="mi_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _methyl_es = OutputBlockField(name="methyl_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") mrna_gpl_file = BlockField("mrna_gpl_file", FieldType.CUSTOM, None) mirna_gpl_file = BlockField("mirna_gpl_file", FieldType.CUSTOM, None) methyl_gpl_file = BlockField("methyl_gpl_file", FieldType.CUSTOM, None) pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done' ]: return True return False def __init__(self, *args, **kwargs): super(UserUploadComplex, self).__init__(*args, **kwargs) self.celery_task = None def process_upload(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s(user_upload_complex_task, exp, self) exp.store_block(self) self.celery_task.apply_async() def phenotype_for_js(self, exp, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") return prepare_phenotype_for_js_from_es(es) def update_user_classes_assignment(self, exp, request, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) pheno_df[received["user_class_title"]] = received["classes"] for work_es in [m_rna_es, mi_rna_es, methyl_es]: if work_es is not None: work_es.pheno_metadata["user_class_title"] = received[ "user_class_title"] work_es.store_pheno_data_frame(pheno_df) # import ipdb; ipdb.set_trace() exp.store_block(self) def success(self, exp, m_rna_es, mi_rna_es, methyl_es): if m_rna_es: self.set_out_var("m_rna_es", m_rna_es) if mi_rna_es: self.set_out_var("mi_rna_es", mi_rna_es) if methyl_es: self.set_out_var("methyl_es", methyl_es) exp.store_block(self)
class PatternFilter(GenericBlock): block_base_name = "PattFilter" name = "Pattern Filter" block_group = GroupType.FILTER is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True) _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=False) _gs = InputBlockField(name="gs", order_num=30, required_data_type="GeneSets", required=True) metric = ParamField( "metric", title="Metric", order_num=40, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mutual_information", options={ "inline_select_provider": True, "select_options": [["mutual_information", "Mutual Information"], ['normed_mutual_information', "Normed Mutual Information"], ['square_error', "Square Error"], ['correlation', "Correlation"], ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]] }) n_best = ParamField(name="n_best", order_num=50, title="# of best", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=10) patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(PatternFilter, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( pattern_filter_task, exp, self, m_rna_es=self.get_input_var("mRNA"), mi_rna_es=self.get_input_var("miRNA"), gene_sets=self.get_input_var("gs"), metric=self.metric, n_best=self.n_best, base_filename="%s_comodule_sets" % self.uuid) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, gs): self.set_out_var("patterns", gs) exp.store_block(self)
class GenericClassifier(GenericBlock): block_group = GroupType.CLASSIFIER is_abstract = True is_block_supports_auto_execution = True classifier_name = "" # Block behavior _block_actions = ActionsList([]) _block_actions.extend(save_params_actions_list) _block_actions.extend(execute_block_actions_list) # User defined parameters # Input ports definition _train_es = InputBlockField(name="train_es", order_num=10, required_data_type="ExpressionSet", required=True) _test_es = InputBlockField(name="test_es", order_num=20, required_data_type="ExpressionSet", required=True) # Provided outputs _result = OutputBlockField(name="result", field_type=FieldType.CUSTOM, provided_data_type="ClassifierResult", init_val=None) def __init__(self, *args, **kwargs): super(GenericClassifier, self).__init__(*args, **kwargs) self.celery_task = None self.classifier_options = {} self.fit_options = {} @abstractmethod def collect_options(self): """ Should populate `self.classifier_options` and `self.fit_options` from block parameters. """ pass def get_option_safe(self, name, target_type=None): if hasattr(self, name): raw = getattr(self, name) if raw: if target_type: try: return target_type(raw) except: pass else: return raw return None def collect_option_safe(self, name, target_type=None, target_name=None): value = self.get_option_safe(name, target_type) # from celery.contrib import rdb; rdb.set_trace() if value: if target_name: self.classifier_options[target_name] = value else: self.classifier_options[name] = value return value def execute(self, exp, *args, **kwargs): self.set_out_var("result", None) self.collect_options() train_es = self.get_input_var("train_es") test_es = self.get_input_var("test_es") self.celery_task = wrapper_task.s( apply_classifier, exp=exp, block=self, train_es=train_es, test_es=test_es, classifier_name=self.classifier_name, classifier_options=self.classifier_options, fit_options=self.fit_options, base_folder=exp.get_data_folder(), base_filename="%s_%s" % (self.uuid, self.classifier_name), ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, result, *args, **kwargs): # We store obtained result as an output variable self.set_out_var("result", result) exp.store_block(self) def reset_execution(self, exp, *args, **kwargs): self.clean_errors() # self.get_scope().remove_temp_vars() self.set_out_var("result", None) exp.store_block(self)
class GeneSetAgg(GenericBlock): block_base_name = "GENE_SET_AGG" name = "Gene sets aggregation" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _gs = InputBlockField(name="gs", order_num=20, required_data_type="GeneSets", required=True) agg_method = ParamField("agg_method", title="Aggregate method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mean", options={ "inline_select_provider": True, "select_options": [["mean", "Mean"], ["media", "Median"]] }) agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(GeneSetAgg, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") gs = self.get_input_var("gs") base_filename = "%s_gs_agg" % (self.uuid, ) self.celery_task = wrapper_task.s(do_gs_agg, exp, self, es, gs, self.agg_method, base_filename) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, agg_es): self.set_out_var("agg_es", agg_es) exp.store_block(self)
class GeneSetAggCV(GenericBlock): block_group = GroupType.AGGREGATION block_base_name = "CV_GS_A" name = "CV Gene Sets Aggregation" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_train_es = InputBlockField(name="train_es", order_num=10, required_data_type="ExpressionSet", required=True) _input_test_es = InputBlockField(name="test_es", order_num=20, required_data_type="ExpressionSet", required=True) _input_gs = InputBlockField(name="gs", order_num=30, required_data_type="GeneSets", required=True) agg_method = ParamField( "agg_method", title="Aggregate method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mean", options={ "inline_select_provider": True, "select_options": [ ["mean", "Mean"], ["median", "Median"], ["pca", "PCA"] ] } ) out_train_es = OutputBlockField(name="out_train_es", provided_data_type="ExpressionSet") out_test_es = OutputBlockField(name="out_test_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(GeneSetAggCV, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() train_es = self.get_input_var("train_es") test_es = self.get_input_var("test_es") gene_sets = self.get_input_var("gs") self.celery_task = wrapper_task.s( agg_task_cv, exp, self, train_es=train_es, test_es=test_es, gene_sets=gene_sets, method=self.agg_method, base_filename="%s_%s_agg" % (self.uuid, "pca_cv") ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, out_train_es, out_test_es): self.set_out_var("out_train_es", out_train_es) self.set_out_var("out_test_es", out_test_es) exp.store_block(self)
class FeatureSelectionByCut(GenericBlock): block_base_name = "FS_BY_CUT" block_group = GroupType.FILTER name = "Feature Selection by Ranking" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _rank_table = InputBlockField(name="rank_table", order_num=20, required_data_type="TableResult", required=True) _cut_property_options = BlockField(name="cut_property_options", field_type=FieldType.RAW, is_a_property=True) cut_property = ParamField( name="cut_property", title="Ranking property to use", # input_type=InputType.SELECT, input_type=InputType.TEXT, field_type=FieldType.STR, #select_provider="cut_property_options", order_num=10, ) threshold = ParamField( name="threshold", title="Threshold for cut", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, ) _cut_direction_options = BlockField(name="cut_direction_options", field_type=FieldType.RAW) cut_direction_options = ["<", "<=", ">=", ">"] cut_direction = ParamField(name="cut_direction", title="Direction of cut", input_type=InputType.SELECT, field_type=FieldType.STR, select_provider="cut_direction_options", order_num=30, options={ "inline_select_provider": True, "select_options": [[op, op] for op in ["<", "<=", ">=", ">"]] }) es = OutputBlockField(name="es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FeatureSelectionByCut, self).__init__(*args, **kwargs) self.celery_task = None @property def cut_property_options(self): # import ipdb; ipdb.set_trace() rank_table = self.get_input_var("rank_table") if rank_table and hasattr(rank_table, "headers"): return [{ "pk": header, "str": header } for header in rank_table.headers] def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( feature_selection_by_cut, exp=exp, block=self, src_es=self.get_input_var("es"), rank_table=self.get_input_var("rank_table"), cut_property=self.cut_property, threshold=self.threshold, cut_direction=self.cut_direction, base_filename="%s_feature_selection" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, es): self.set_out_var("es", es) exp.store_block(self)
class NCF(GenericBlock): block_group = GroupType.CLASSIFIER block_base_name = "NCF" name = "Network-Constrained Forest" classifier_name = "ncf" is_abstract = False is_block_supports_auto_execution = True # Block behavior _block_actions = ActionsList([]) _block_actions.extend(save_params_actions_list) _block_actions.extend(execute_block_actions_list) gene2gene = InputBlockField(name="gene2gene", order_num=30, required_data_type="BinaryInteraction", required=True) miRNA2gene = InputBlockField(name="miRNA2gene", order_num=31, required_data_type="BinaryInteraction", required=True) # User defined parameters # Input ports definition _m_train_es = InputBlockField(name="mRNA_train_es", order_num=10, required_data_type="ExpressionSet", required=True) _m_test_es = InputBlockField(name="mRNA_test_es", order_num=20, required_data_type="ExpressionSet", required=True) _mi_train_es = InputBlockField(name="miRNA_train_es", order_num=21, required_data_type="ExpressionSet", required=True) _mi_test_es = InputBlockField(name="miRNA_test_es", order_num=22, required_data_type="ExpressionSet", required=True) # Provided outputs _result = OutputBlockField(name="result", field_type=FieldType.CUSTOM, provided_data_type="ClassifierResult", init_val=None) n_estimators = ParamField(name="n_estimators", title="The number of trees in the forest", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="1000", order_num=41) walk_max_length = ParamField(name="walk_max_length", title="Walk max length", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="10", order_num=50) criterion = ParamField( name="criterion", title="The function to measure the quality of a split", input_type=InputType.SELECT, field_type=FieldType.STR, order_num=60, options={ "inline_select_provider": True, "select_options": [["gini", "Gini impurity"], ["entropy", "Information gain"]] }) eps = ParamField(name="eps", title="Eps", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val="0.01", order_num=70) max_depth = ParamField(name="max_depth", title="The maximum depth of the tree", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="2", order_num=80) min_samples_split = ParamField( name="min_samples_split", title="The minimum number of samples to split an internal node", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="2", order_num=90, ) min_samples_leaf = ParamField( name="min_samples_leaf", title="The minimum number of samples to be at a leaf node", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="2", order_num=100) bootstrap = ParamField(name="bootstrap", title="bootstrap", input_type=InputType.CHECKBOX, field_type=FieldType.BOOLEAN, required=False, order_num=110) def __init__(self, *args, **kwargs): super(NCF, self).__init__(*args, **kwargs) self.celery_task = None self.classifier_options = {} self.fit_options = {} def execute(self, exp, *args, **kwargs): self.set_out_var("result", None) self.collect_options() mRNA_train_es = self.get_input_var("mRNA_train_es") mRNA_test_es = self.get_input_var("mRNA_test_es") miRNA_train_es = self.get_input_var("miRNA_train_es") miRNA_test_es = self.get_input_var("miRNA_test_es") self.celery_task = wrapper_task.s( apply_ncf_classifier, exp=exp, block=self, mRNA_train_es=mRNA_train_es, mRNA_test_es=mRNA_test_es, miRNA_train_es=miRNA_train_es, miRNA_test_es=miRNA_test_es, classifier_name=self.classifier_name, classifier_options=self.classifier_options, fit_options=self.fit_options, base_folder=exp.get_data_folder(), base_filename="%s_%s" % (self.uuid, self.classifier_name), ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, result, *args, **kwargs): # We store obtained result as an output variable self.set_out_var("result", result) exp.store_block(self) def reset_execution(self, exp, *args, **kwargs): self.clean_errors() # self.get_scope().remove_temp_vars() self.set_out_var("result", None) exp.store_block(self) def get_option_safe(self, name, target_type=None): if hasattr(self, name): raw = getattr(self, name) if raw: if target_type: try: return target_type(raw) except: pass else: return raw return None def collect_option_safe(self, name, target_type=None, target_name=None): value = self.get_option_safe(name, target_type) # from celery.contrib import rdb; rdb.set_trace() if value: if target_name: self.classifier_options[target_name] = value else: self.classifier_options[name] = value return value def collect_options(self): self.classifier_options["gene2gene"] = self.get_input_var("gene2gene") self.classifier_options["miRNA2gene"] = self.get_input_var( "miRNA2gene") self.classifier_options['walk_lengths'] = range( 1, int(self.walk_max_length)) self.collect_option_safe("eps") self.collect_option_safe("n_estimators", int) # self.collect_option_safe("max_features") self.collect_option_safe("max_depth", int) self.collect_option_safe("min_samples_leaf", int) self.collect_option_safe("min_samples_split", int) self.classifier_options["bootstrap"] = self.bootstrap
class UserUpload(GenericBlock): block_base_name = "UPLOAD" block_group = GroupType.INPUT_DATA is_abstract = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data", reload_block_in_client=True), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params", reload_block_in_client=True), ]) es_matrix = ParamField("es_matrix", title="Expression set matrix", order_num=0, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) es_matrix_ori = ParamField( "es_matrix_ori", title="Matrix orientation", order_num=1, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [ ["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"] ] } ) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) gpl_platform = ParamField("gpl_platform", title="Platform ID", order_num=20, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) working_unit = ParamField("working_unit", title="Working unit [used when platform is unknown]", order_num=3, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) # TODO: add sub page field # pages = BlockField("pages", FieldType.RAW, init_val={ # "assign_sample_classes": { # "title": "Assign sample classes", # "resource": "assign_sample_classes", # "widget": "widgets/fetch_gse/assign_sample_classes.html" # }, # }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) ### PARAMETERS _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _gpl_annotation = OutputBlockField(name="gpl_annotation", field_type=FieldType.HIDDEN, provided_data_type="PlatformAnnotation") # TODO: COPY PASTE from fetch_gse block pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) def __init__(self, *args, **kwargs): super(UserUpload, self).__init__("User upload", *args, **kwargs) @property def is_sub_pages_visible(self): if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']: return True return False def phenotype_for_js(self, exp, *args, **kwargs): return prepare_phenotype_for_js_from_es(self.get_out_var("expression_set")) def update_user_classes_assignment(self, exp, request, *args, **kwargs): es = self.get_out_var("expression_set") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) es.pheno_metadata["user_class_title"] = received["user_class_title"] pheno_df[received["user_class_title"]] = received["classes"] es.store_pheno_data_frame(pheno_df) exp.store_block(self) def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file()) es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_annotation" % self.uuid) pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file()) pheno_df.set_index(pheno_df.columns[0]) user_class_title = es.pheno_metadata["user_class_title"] if user_class_title not in pheno_df.columns: pheno_df[es.pheno_metadata["user_class_title"]] = "" # if matrix is bad oriented, then do transposition if self.es_matrix_ori == "GxS": assay_df = assay_df.T es.store_assay_data_frame(assay_df) es.store_pheno_data_frame(pheno_df) if self.working_unit: es.working_unit = self.working_unit self.set_out_var("expression_set", es) exp.store_block(self) self.do_action("success", exp) # self.celery_task_fetch.apply_async() def success(self, exp, *args, **kwargs): pass
class UploadInteraction(GenericBlock): block_base_name = "GENE_INTERACTION" block_group = GroupType.INPUT_DATA name = "Upload Gene Interaction" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data"), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params"), ]) upload_interaction = ParamField("upload_interaction", title="Interaction file", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) interaction_type = ParamField("interaction_type", title="Interaction type", order_num=11, input_type=InputType.SELECT, field_type=FieldType.STR, required=True, init_val="PPI", options={ "inline_select_provider": True, "select_options": [["PPI", "PPI"], ["miRNA", "miRNA Target"]] }) x1_unit = ParamField("x1_unit", title="(x1, x2) - x1 unit", order_num=12, input_type=InputType.SELECT, field_type=FieldType.STR, required=True, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"], ["Symbol", "Symbol"], ["mirbase", "miRBase ID"]] }) x2_unit = ParamField("x2_unit", title="(x1, x2) - x2 unit", order_num=13, input_type=InputType.SELECT, field_type=FieldType.STR, required=True, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"], ["Symbol", "Symbol"], ["mirbase", "miRBase ID"]] }) header = ParamField("header", title="Header", order_num=23, input_type=InputType.CHECKBOX, field_type=FieldType.BOOLEAN, required=False) bi_data_type = ParamField( "bi_data_type", title="Data type", order_num=40, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="matrix", options={ "inline_select_provider": True, "select_options": [ # ["matrix", "Matrix"], ["pairs", "Pairs"], ["pairs_diff", "Pairs - different units in interaction"], ["triples", "Triples with values"], [ "triples_diff", "Triples with values - different units in interaction" ] ] }) csv_sep = ParamField("csv_sep", title="CSV separator symbol", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) _interaction = OutputBlockField(name="interaction", provided_data_type="BinaryInteraction") def move_to_exp(self, exp_id): interaction = self.get_out_var("interaction") def __init__(self, *args, **kwargs): super(UploadInteraction, self).__init__(*args, **kwargs) self.celery_task = None def process_upload(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s(upload_interaction_task, exp, self) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, interaction): self.set_out_var("interaction", interaction) exp.store_block(self)
class UserUploadComplex(GenericBlock): block_base_name = "UPLOAD_CMPLX" block_group = GroupType.INPUT_DATA name = "Upload mRna/miRna/methyl dataset" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data"), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params"), ]) m_rna_matrix = ParamField("m_rna_matrix", title="mRNA expression", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) m_rna_unit = ParamField( "m_rna_unit", title="Working unit [used when platform is unknown]", init_val=None, order_num=12, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) mi_rna_matrix = ParamField("mi_rna_matrix", title=u"μRNA expression", order_num=20, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) methyl_matrix = ParamField("methyl_matrix", title="Methylation expression", order_num=30, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=40, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) csv_sep = ParamField("csv_sep", title="CSV separator symbol", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) _m_rna_es = OutputBlockField(name="m_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _m_rna_annotation = OutputBlockField( name="m_rna_annotation", field_type=FieldType.HIDDEN, provided_data_type="PlatformAnnotation") _mi_rna_es = OutputBlockField(name="mi_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _methyl_es = OutputBlockField(name="methyl_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done' ]: return True return False def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ # TODO: move to celery self.clean_errors() sep = getattr(self, "csv_sep", " ") try: if not self.pheno_matrix: self.warnings.append(Exception("Phenotype is undefined")) pheno_df = None else: pheno_df = self.pheno_matrix.get_as_data_frame(sep) pheno_df.set_index(pheno_df.columns[0]) # TODO: solve somehow better: Here we add empty column with user class assignment pheno_df[ExpressionSet( None, None).pheno_metadata["user_class_title"]] = "" if self.m_rna_matrix is not None: m_rna_assay_df = self.m_rna_matrix.get_as_data_frame(sep) m_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_m_rna_es" % self.uuid) m_rna_es.store_assay_data_frame(m_rna_assay_df) m_rna_es.store_pheno_data_frame(pheno_df) m_rna_es.working_unit = self.m_rna_unit self.set_out_var("m_rna_es", m_rna_es) # TODO: fetch GPL annotation if GPL id was provided if self.mi_rna_matrix is not None: mi_rna_assay_df = self.mi_rna_matrix.get_as_data_frame(sep) mi_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_mi_rna_es" % self.uuid) mi_rna_es.store_assay_data_frame(mi_rna_assay_df) mi_rna_es.store_pheno_data_frame(pheno_df) self.set_out_var("mi_rna_es", mi_rna_es) if self.methyl_matrix is not None: methyl_assay_df = self.methyl_matrix.get_as_data_frame(sep) methyl_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_methyl_es" % self.uuid) methyl_es.store_assay_data_frame(methyl_assay_df) methyl_es.store_pheno_data_frame(pheno_df) self.set_out_var("methyl_es", methyl_es) self.do_action("success", exp) except Exception as e: ex_type, ex, tb = sys.exc_info() traceback.print_tb(tb) self.do_action("error", exp, e) # self.celery_task_fetch.apply_async() def phenotype_for_js(self, exp, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") return prepare_phenotype_for_js_from_es(es) def update_user_classes_assignment(self, exp, request, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) pheno_df[received["user_class_title"]] = received["classes"] for work_es in [m_rna_es, mi_rna_es, methyl_es]: if work_es is not None: work_es.pheno_metadata["user_class_title"] = received[ "user_class_title"] work_es.store_pheno_data_frame(pheno_df) # import ipdb; ipdb.set_trace() exp.store_block(self) def success(self, exp, *args, **kwargs): pass
class FetchGSE(GenericBlock): block_base_name = "FETCH_GEO" name = "Fetch from NCBI GEO" block_group = GroupType.INPUT_DATA _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("start_fetch", ["valid_params", "done"], "source_is_being_fetched", "Start fetch"), ActionRecord("error_during_fetch", ["source_is_being_fetched"], "form_valid", reload_block_in_client=True), ActionRecord("successful_fetch", ["source_is_being_fetched"], "source_was_fetched", reload_block_in_client=True), ActionRecord("start_preprocess", ["source_was_fetched", "source_was_preprocessed"], "source_is_being_fetched", "Run preprocess"), ActionRecord("error_during_preprocess", ["source_is_being_fetched"], "source_was_fetched", reload_block_in_client=True), ActionRecord("successful_preprocess", ["source_is_being_fetched"], "source_was_preprocessed", reload_block_in_client=True), ActionRecord("assign_sample_classes", ["source_was_preprocessed", "done"], "done"), ]) source_file = BlockField("source_file", FieldType.CUSTOM, None) pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) ### PARAMETERS geo_uid = ParamField("geo_uid", "Geo accession id", InputType.TEXT, FieldType.STR, "") _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): #"Fetch ncbi gse", super(FetchGSE, self).__init__(*args, **kwargs) self.celery_task_fetch = None self.celery_task_preprocess = None def is_form_fields_editable(self): if self.state in ['created', 'form_modified']: return True return False def phenotype_for_js(self, exp, *args, **kwargs): headers_options = { "custom_title_prefix_map": [ ("Sample_title", "Title"), ("Sample_description", "Description"), ("Sample_characteristics", "Characteristics"), ("Sample_organism", "Organism"), ("Sample_geo_accession", "GEO #"), ("Sample_", ""), ], "prefix_order": [ "Sample_geo_accession", "Sample_title", "Sample_description", "Sample_contact", "Sample_characteristics", ], "prefix_hide": { "Sample_contact", "Sample_channel", "Sample_data_row_count", "Sample_data", "Sample_platform", "Sample_growth", "Sample_series_id", "Sample_status", "Sample_extract", "Sample_supplementary_file", "Sample_hyb", "Sample_label", "Sample_source", "Sample_last_update", "Sample_molecule", "Sample_organism", "Sample_scan", "Sample_taxid", "Sample_type", "Sample_submission", } } return prepare_phenotype_for_js_from_es( self.get_out_var("expression_set"), headers_options) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready' ]: return True return False def start_fetch(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() self.celery_task_fetch = wrapper_task.s( fetch_geo_gse, exp, self, geo_uid=self.geo_uid, success_action="successful_fetch", error_action="error_during_fetch", ignore_cache=False) exp.store_block(self) self.celery_task_fetch.apply_async() def error_during_fetch(self, exp, *args, **kwargs): exp.store_block(self) def successful_fetch(self, exp, source_file, *args, **kwargs): self.clean_errors() self.source_file = source_file self.do_action("start_preprocess", exp) exp.store_block(self) def start_preprocess(self, exp, *args, **kwargs): self.celery_task_preprocess = wrapper_task.s( preprocess_soft, exp, self, source_file=self.source_file, success_action="successful_preprocess", error_action="error_during_preprocess") exp.store_block(self) self.celery_task_preprocess.apply_async() def error_during_preprocess(self, exp, *args, **kwargs): exp.store_block(self) def successful_preprocess(self, exp, es, *args, **kwargs): """ @type es: ExpressionSet @type ann: PlatformAnnotation """ self.set_out_var("expression_set", es) # self.set_out_var("gpl_annotation", ann) self.clean_errors() exp.store_block(self) msg = BlockUpdated(self.exp_id, self.uuid, self.base_name) msg.comment = u"Dataset %s was preprocessed, \n please assign samples to classes" % self.geo_uid msg.silent = False msg.send() def update_user_classes_assignment(self, exp, request, *args, **kwargs): #TODO: unify code with user upload es = self.get_out_var("expression_set") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) es.pheno_metadata["user_class_title"] = received["user_class_title"] pheno_df[received["user_class_title"]] = received["classes"] es.store_pheno_data_frame(pheno_df) exp.store_block(self) self.do_action("assign_sample_classes", exp) def assign_sample_classes(self, exp, *args, **kwargs): pass
class NIMFASNMNMFBlock(GenericBlock): block_base_name = "NIMFA_SNMNMF" name = "NIMFA SNMNMF" is_abstract = False block_group = GroupType.SNMNMF is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True) _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=True) # _dna_methyl = InputBlockField(name="DNAmethyl", order_num=30, required_data_type="ExpressionSet", required=False) _gene2gene = InputBlockField(name="Gene2Gene", order_num=40, required_data_type="BinaryInteraction", required=True) _mirna2gene = InputBlockField(name="miRNA2gene", order_num=50, required_data_type="BinaryInteraction", required=True) # _gene2DNAmethylation = InputBlockField(name="Gene2DNAmethyl", order_num=60, required_data_type="BinaryInteraction", required=False) l1 = ParamField(name="l1", order_num=70, title="l1", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) l2 = ParamField(name="l2", order_num=80, title="l2", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) g1 = ParamField(name="g1", order_num=90, title="g1", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) g2 = ParamField(name="g2", order_num=100, title="g2", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) rank = ParamField(name="rank", order_num=110, title="rank", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=50) w = OutputBlockField(name="W", provided_data_type="ExpressionSet") H1_miRNA = OutputBlockField(name="H1_miRNA", provided_data_type="ExpressionSet") H2_genes = OutputBlockField(name="H2_genes", provided_data_type="ExpressionSet") # H3_DNAmethyl = OutputBlockField(name="H3_DNAmethyl", provided_data_type="ExpressionSet") #H1_perf = OutputBlockField(name="H1_perf", provided_data_type="ExpressionSet") #H2_perf = OutputBlockField(name="H2_perf", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(NIMFASNMNMFBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA = self.get_input_var("mRNA") miRNA = self.get_input_var("miRNA") #DNAmethyl = self.get_input_var("DNAmethyl") Gene2Gene = self.get_input_var("Gene2Gene") miRNA2gene = self.get_input_var("miRNA2gene") #Gene2DNAmethyl = self.get_input_var("Gene2DNAmethyl") self.celery_task = wrapper_task.s( nimfa_snmnmf_task, exp, self, mRNA=mRNA, miRNA=miRNA, #DNAmethyl = DNAmethyl, gene2gene=Gene2Gene, miRNA2gene=miRNA2gene, #gene2DNAmethylation = Gene2DNAmethyl, params={'l1': self.l1, 'l2': self.l2, 'g1': self.g1, 'g2': self.g2, 'rank': self.rank}, base_filename="%s_nimfa_snmnmf" % self.uuid ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, W, H1, H2): self.set_out_var("W", W) self.set_out_var("H1_miRNA", H1) self.set_out_var("H2_genes", H2) #self.set_out_var("H1_perf", matrices[3]) #self.set_out_var("H2_perf", matrices[4]) exp.store_block(self)
class GenericRankingBlock(GenericBlock): block_base_name = "" block_group = GroupType.PROCESSING is_abstract = True is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField( name="es", order_num=10, required_data_type="ExpressionSet", required=True ) ## TODO: remove from generic ranking best = ParamField( name="best", title="Consider only best", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=None ) _result = OutputBlockField(name="result", field_type=FieldType.STR, provided_data_type="TableResult", init_val=None) def __init__(self, *args, **kwargs): super(GenericRankingBlock, self).__init__(*args, **kwargs) self.ranking_name = None self.ranking_options = {} self.celery_task = None exp = Experiment.get_exp_by_id(self.exp_id) self.result = TableResult( base_dir=exp.get_data_folder(), base_filename="%s_gt_result" % self.uuid, ) self.set_out_var("result", self.result) def collect_options(self): self.ranking_options = {} def execute(self, exp, *args, **kwargs): self.clean_errors() self.collect_options() self.celery_task = wrapper_task.s( apply_ranking, exp=exp, block=self, es=self.get_input_var("es"), ranking_name=self.ranking_name, result_table=self.result, options=self.ranking_options ) exp.store_block(self) self.celery_task.apply_async() exp.log(self.uuid, "Sent ranking computation to queue") log.debug("Sent ranking computation to queue") def success(self, exp, result, *args, **kwargs): self.result = result self.set_out_var("result", self.result) exp.store_block(self)
class FilterBlock(GenericBlock): block_base_name = "FILTER" name = "Var/Val Filter" is_abstract = False block_group = GroupType.FILTER is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) filter_method = ParamField("filter_method", title="Filter method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="LOW_VAL", options={ "inline_select_provider": True, "select_options": [["LOW_VAL", "Low Val Filter"], ["VAR", "Var Filter"]] }) q = ParamField(name="q", title="Threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=30.0) flt_es = OutputBlockField(name="flt_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FilterBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") self.celery_task = wrapper_task.s(filter_task, exp, self, filter_type=self.filter_method, q=self.q, es=es, base_filename="%s_%s_flt" % (self.uuid, self.filter_method)) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("flt_es", flt_es) exp.store_block(self)
class MergeComoduleSets(GenericBlock): block_base_name = "MERGE_COMODULE_SETS" name = "Merge Comodule Sets" is_abstract = False block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _cs_1 = InputBlockField(name="cs_1", order_num=10, required_data_type="ComoduleSet", required=True) _cs_1_name = ParamField(name="cs_1_name", order_num=11, title="Comodule 1 name", input_type=InputType.TEXT, field_type=FieldType.STR, init_val="genes") _cs_2 = InputBlockField(name="cs_2", order_num=20, required_data_type="ComoduleSet", required=True) _cs_2_name = ParamField(name="cs_2_name", order_num=21, title="Comodule 2 name", input_type=InputType.TEXT, field_type=FieldType.STR, init_val="genes") flt_es = OutputBlockField(name="comodule_set", provided_data_type="ComoduleSet") def __init__(self, *args, **kwargs): super(MergeComoduleSets, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() cs_1 = self.get_input_var("cs_1") cs_2 = self.get_input_var("cs_2") self.celery_task = wrapper_task.s(merge_comodules_task, exp, self, cs_1=cs_1, cs_2=cs_2, cs_1_name=self.cs_1_name, cs_2_name=self.cs_2_name, base_filename="%s_%s_thr" % (self.uuid, 'merge_cs')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("comodule_set", flt_es) exp.store_block(self)
class UniformMetaBlock(GenericBlock): is_abstract = True block_group = GroupType.META_PLUGIN create_new_scope = True is_block_supports_auto_execution = True _block_actions = ActionsList([]) _block_actions.extend(ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("add_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"), ActionRecord("remove_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"), ActionRecord("execute", ["ready"], "generating_folds", user_title="Run block"), ActionRecord("on_folds_generation_success", ["generating_folds"], "ready_to_run_sub_scope", reload_block_in_client=True), ActionRecord("continue_collecting_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"), ActionRecord("run_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"), ActionRecord("on_sub_scope_done", ["sub_scope_executing"], "ready_to_run_sub_scope"), ActionRecord("success", ["working", "ready_to_run_sub_scope"], "done", propagate_auto_execution=True, reload_block_in_client=True), ActionRecord("error", ["*", "ready", "working", "sub_scope_executing", "generating_folds", "ready_to_run_sub_scope"], "execution_error", reload_block_in_client=True), ActionRecord("reset_execution", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope", "generating_folds", "execution_error"], "ready", user_title="Reset execution"), ])) _collector_spec = ParamField(name="collector_spec", title="", field_type=FieldType.CUSTOM, input_type=InputType.HIDDEN, init_val=None, required=False ) res_seq = BlockField(name="res_seq", provided_data_type="SequenceContainer", field_type=FieldType.HIDDEN, init_val=None) _results_container = OutputBlockField( name="results_container", provided_data_type="ResultsContainer", field_type=FieldType.HIDDEN, init_val=None ) def __init__(self, *args, **kwargs): super(UniformMetaBlock, self).__init__(*args, **kwargs) self.auto_exec_status_working.update(["sub_scope_executing", "ready_to_run_sub_scope", "generating_folds"]) self.inner_output_manager = IteratedInnerFieldManager() self.collector_spec = CollectorSpecification() self.collector_spec.label = self.block_base_name + "_collection" self.inner_output_es_names_map = {} self.celery_task = None self.set_out_var("results_container", None) self.res_seq = SequenceContainer() def remap_inputs(self, mapping): for var in self.bound_inputs.itervalues(): var.change_block(mapping) for var in self.collector_spec.bound.itervalues(): var.change_block(mapping) @property def is_sub_pages_visible(self): if self.state in ['valid_params', 'done', 'ready']: return True return False @abstractmethod def get_fold_labels(self): pass @abstractmethod def get_repeat_labels(self): pass def get_inner_out_var(self, name): return self.inner_output_manager.get_var(name) def run_sub_scope(self, exp, *args, **kwargs): self.reset_execution_for_sub_blocks() cell = self.res_seq.sequence[self.inner_output_manager.iterator] log.debug("Cell!!!!!!!! %s", str(cell)) act = self.inner_output_manager.sequence[self.inner_output_manager.iterator] log.debug("Cell!!!!!!!! %s", str(act)) exp.store_block(self) sr = ScopeRunner(exp, self.sub_scope_name) sr.execute() def on_sub_scope_done(self, exp, *args, **kwargs): """ @type exp: Experiment This action should be called by ScopeRunner when all blocks in sub-scope have exec status == done """ r = get_redis_instance() with redis_lock.Lock(r, ExpKeys.get_block_global_lock_key(self.exp_id, self.uuid)): cell = self.res_seq.sequence[self.inner_output_manager.iterator] for name, scope_var in self.collector_spec.bound.iteritems(): var = exp.get_scope_var_value(scope_var) exp.log(self.uuid, "Collected %s from %s" % (var, scope_var.title), severity="CRITICAL") log.debug("Collected %s from %s", var, scope_var.title) if var is not None: if hasattr(var, "clone"): cell[name] = var.clone("%s_%s" % (self.uuid, self.inner_output_manager.iterator)) else: cell[name] = deepcopy(var) self.res_seq.sequence[self.inner_output_manager.iterator] = cell exp.store_block(self) if len(cell) < len(self.res_seq.fields): self.do_action("continue_collecting_sub_scope", exp) else: try: self.inner_output_manager.next() self.do_action("run_sub_scope", exp) except StopIteration, e: # All folds were processed without errors self.build_result_collection(exp) self.do_action("success", exp)
class PatternEdges(GenericBlock): block_base_name = "PA_EDGES" block_group = GroupType.PATTERN_SEARCH name = "Patterns Edges" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) # _upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=12, # input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) _input_comodule_set = InputBlockField(name="cs", order_num=20, required_data_type="GeneSets", required=True) _gene2gene = InputBlockField(name="gene2gene", order_num=30, required_data_type="BinaryInteraction", required=True) edges = OutputBlockField(name="edges", provided_data_type="Edges") diff_expr = OutputBlockField(name="diff_expr", provided_data_type="DiffExpr") def execute(self, exp, *args, **kwargs): self.clean_errors() cs = self.get_input_var("cs") """:type :ComoduleSet""" es = self.get_input_var("es") """:type :ExpressionSet""" # gene_platform = self.upload_gene2gene_platform gene2gene = self.get_input_var("gene2gene") """:type :BinaryInteraction""" # gene_platform = gene_platform.get_file() # with open(gene_platform.path) as f: # for line in f: # gene_platform = line.split(',') # self.clean_errors() self.celery_task = wrapper_task.s( compute_edges, exp, self, m_rna_es=es, comodule_set=cs, gene2gene=gene2gene, # gene_platform = gene_platform, base_filename="%s_pattern_edges" % self.uuid) exp.store_block(self) self.celery_task.apply_async() # def export_json(self, exp, *args, **kwargs): # ds = self.get_input_var("es") # dic = ds.load_set() # return dic def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ try: self.do_action("success", exp) except Exception as e: ex_type, ex, tb = sys.exc_info() traceback.print_tb(tb) self.do_action("error", exp, e) def success(self, exp, edges, diff_expr): self.set_out_var("edges", edges) self.set_out_var("diff_expr", diff_expr) exp.store_block(self)