예제 #1
0
class FilterByInteraction(GenericBlock):
    block_base_name = "FILTER_BY_BI"
    name = "Filter ES by interaction"
    block_group = GroupType.PROCESSING
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _mRNA_es = InputBlockField(name="mRNA_es",
                               order_num=10,
                               required_data_type="ExpressionSet",
                               required=True)
    _miRNA_es = InputBlockField(name="miRNA_es",
                                order_num=20,
                                required_data_type="ExpressionSet",
                                required=True)
    _interaction = InputBlockField(name="interaction",
                                   order_num=30,
                                   required_data_type="BinaryInteraction",
                                   required=True)

    m_rna_filtered_es = OutputBlockField(name="m_rna_filtered_es",
                                         provided_data_type="ExpressionSet")
    mi_rna_filtered_es = OutputBlockField(name="mi_rna_filtered_es",
                                          provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FilterByInteraction, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA_es = self.get_input_var("mRNA_es")
        miRNA_es = self.get_input_var("miRNA_es")
        interaction_matrix = self.get_input_var("interaction")

        self.celery_task = wrapper_task.s(
            filter_by_bi,
            exp,
            self,
            m_rna_es=mRNA_es,
            mi_rna_es=miRNA_es,
            interaction_matrix=interaction_matrix,
            base_filename="%s_filtered_by_BI" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, m_rna_filtered_es, mi_rna_filtered_es):
        self.set_out_var("m_rna_filtered_es", m_rna_filtered_es)
        self.set_out_var("mi_rna_filtered_es", mi_rna_filtered_es)
        exp.store_block(self)
예제 #2
0
class EnrichmentNoTBlock(GenericBlock):
    block_base_name = "ENRICHMENT_COM"
    name = "Comodule Enrichment"

    is_abstract = False
    block_group = GroupType.TESTING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="gs",
                            order_num=10,
                            required_data_type="GeneSets",
                            required=True)
    H = InputBlockField(name="patterns",
                        order_num=11,
                        required_data_type="GeneSets",
                        required=True)
    _t = ParamField(name="T",
                    order_num=12,
                    title="Enrichment threshold",
                    input_type=InputType.TEXT,
                    field_type=FieldType.FLOAT,
                    init_val="0.05")

    dict = OutputBlockField(name="dictionary_set",
                            provided_data_type="DictionarySet")

    def __init__(self, *args, **kwargs):
        super(EnrichmentNoTBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        gs = self.get_input_var("gs")
        cs = self.get_input_var("patterns")
        self.celery_task = wrapper_task.s(enrichment_no_t_task,
                                          exp,
                                          self,
                                          T=self.T,
                                          gs=gs,
                                          patterns=cs,
                                          base_filename="%s_%s_enrich" %
                                          (self.uuid, 'enrichment_cont'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("dictionary_set", flt_es)
        exp.store_block(self)
예제 #3
0
class MergeGeneSetWithPlatformAnnotation(GenericBlock):
    block_base_name = "MERGE_GS_GPL_ANN"
    name = "Merge gene set with platform"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _input_gs = InputBlockField(name="gs",
                                order_num=10,
                                required_data_type="GeneSets",
                                required=True)
    _input_ann = InputBlockField(name="ann",
                                 order_num=20,
                                 required_data_type="PlatformAnnotation",
                                 required=True)

    _gs = OutputBlockField(name="gs",
                           field_type=FieldType.HIDDEN,
                           init_val=None,
                           provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(MergeGeneSetWithPlatformAnnotation,
              self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        gs, ann = self.get_input_var("gs"), self.get_input_var("ann")
        # import ipdb; ipdb.set_trace()
        self.celery_task = wrapper_task.s(map_gene_sets_to_probes,
                                          exp,
                                          self,
                                          base_dir=exp.get_data_folder(),
                                          base_filename="%s_merged" %
                                          self.uuid,
                                          ann_gene_sets=ann.gene_sets,
                                          src_gene_sets=gs)
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, gs):
        self.set_out_var("gs", gs)
        exp.store_block(self)
예제 #4
0
class SvdSubAgg(GenericBlock):
    is_abstract = True
    block_group = GroupType.AGGREGATION

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _mRNA_es = InputBlockField(name="mRNA_es", order_num=10,
                               required_data_type="ExpressionSet", required=True)
    _miRNA_es = InputBlockField(name="miRNA_es", order_num=20,
                                required_data_type="ExpressionSet", required=True)
    _interaction = InputBlockField(name="interaction", order_num=30,
                                   required_data_type="BinaryInteraction", required=True)

    c = ParamField(name="c", title="Constant c",
                   input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0)

    agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet")

    mode = ""

    def __init__(self, *args, **kwargs):
        super(SvdSubAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA_es = self.get_input_var("mRNA_es")
        miRNA_es = self.get_input_var("miRNA_es")
        interaction_matrix = self.get_input_var("interaction")

        self.celery_task = wrapper_task.s(
            aggregation_task,
            exp, self,
            mode=self.mode,
            c=self.c,
            m_rna_es=mRNA_es,
            mi_rna_es=miRNA_es,
            interaction_matrix=interaction_matrix,
            base_filename="%s_%s_agg" % (self.uuid, self.mode)
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
예제 #5
0
파일: globaltest.py 프로젝트: klema/miXGENE
class GlobalTest(GenericBlock):
    block_base_name = "GLOBAL_TEST"
    name = "Goeman global test"
    block_group = GroupType.PROCESSING
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _input_es = InputBlockField(name="es", order_num=10,
                                required_data_type="ExpressionSet", required=True)
    _input_gs = InputBlockField(name="gs", order_num=20,
                                required_data_type="GeneSets", required=True)

    _result = OutputBlockField(name="result", field_type=FieldType.STR,
                               provided_data_type="TableResult", init_val=None)

    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "gt_result.html"
    ])

    def __init__(self, *args, **kwargs):
        super(GlobalTest, self).__init__(*args, **kwargs)
        self.celery_task = None

        exp = Experiment.get_exp_by_id(self.exp_id)
        self.result = TableResult(
            base_dir=exp.get_data_folder(),
            base_filename="%s_gt_result" % self.uuid,
        )
        self.result.headers = ['p-value', 'Statistic', 'Expected', 'Std.dev', '#Cov']

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(
            global_test_task,
            exp, self,
            es=self.get_input_var("es"),
            gene_sets=self.get_input_var("gs"),
            table_result=self.result
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        self.result = result
        self.set_out_var("result", self.result)
        exp.store_block(self)
예제 #6
0
class MergeExpressionSets(GenericBlock):
    block_base_name = "MergeES"
    name = "Merge ES by concatenation"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es_1 = InputBlockField(name="es_1",
                            title="Set 1",
                            order_num=10,
                            required_data_type="ExpressionSet",
                            required=True)
    _es_2 = InputBlockField(name="es_2",
                            title="Set 2",
                            order_num=20,
                            required_data_type="ExpressionSet",
                            required=True)

    merged_es = OutputBlockField(name="merged_es",
                                 provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(MergeExpressionSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        # import ipdb; ipdb.set_trace()
        self.celery_task = wrapper_task.s(
            merge_two_es,
            exp,
            self,
            es_1=self.get_input_var("es_1"),
            es_2=self.get_input_var("es_2"),
            base_filename="%s_merged" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("merged_es", es)
        exp.store_block(self)
예제 #7
0
파일: generic.py 프로젝트: strny007/miXGENE
    def add_dyn_input(self, exp, received_block, *args, **kwargs):
        spec = received_block.get("_add_dyn_port")
        if not spec:
            return

        if not spec['new_port'] or not spec['input']:
            return

        dyn_port_name = spec['input']
        dyn_port = self._block_serializer.inputs.get(dyn_port_name)
        if not dyn_port:
            return

        order_num = 1000 + abs(dyn_port.order_num) * 10
        dp = getattr(self, dyn_port_name)
        if dp:
            order_num += len(dp)

        new_port = InputBlockField(
            name=spec['new_port'],
            required_data_type=dyn_port.required_data_type,
            order_num=order_num)

        self.add_input_port(new_port)
        getattr(self, dyn_port_name).append(spec["new_port"])

        self.add_dyn_input_hook(exp, dyn_port, new_port)
        exp.store_block(self)
예제 #8
0
class ThresholdBlock(GenericBlock):
    block_base_name = "THRESHOLD"
    name = "Threshold"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    t = ParamField(name="T",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=0.1)

    flt_es = OutputBlockField(name="gene_sets", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(ThresholdBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        # T = self.get_input_var("T")
        self.celery_task = wrapper_task.s(threshold_task,
                                          exp,
                                          self,
                                          es=es,
                                          T=self.T,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'threshold'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("gene_sets", flt_es)
        exp.store_block(self)
예제 #9
0
class RcVisualizer(GenericBlock):
    block_base_name = "RC_VIZUALIZER"
    is_block_supports_auto_execution = False
    block_group = GroupType.VISUALIZE
    is_abstract = True

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "input_bound"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("configure_table", ["input_bound", "ready"], "ready"),
    ])

    results_container = InputBlockField(name="results_container",
                                        required_data_type="ResultsContainer",
                                        required=True,
                                        field_type=FieldType.CUSTOM)
    _rc = BlockField(name="rc",
                     field_type=FieldType.CUSTOM,
                     is_a_property=True)
    _available_metrics = BlockField(name="available_metrics",
                                    field_type=FieldType.RAW,
                                    is_a_property=True)

    metric = ParamField(name="metric",
                        title="Metric",
                        field_type=FieldType.STR,
                        input_type=InputType.SELECT,
                        select_provider="available_metrics")

    def __init__(self, *args, **kwargs):
        super(RcVisualizer, self).__init__(*args, **kwargs)

    @property
    @log_timing
    def available_metrics(self):
        try:
            return [{
                "pk": metric_name,
                "str": metric.title
            } for metric_name, metric in metrics_dict.iteritems()
                    if metric.produce_single_number]
        except Exception, e:
            log.exception(e)
            return []
예제 #10
0
class ZScoreBlock(GenericBlock):
    block_base_name = "ZSCORE_NORM"
    name = "Z-score Normalization"

    is_abstract = False
    block_group = GroupType.NORMALIZATION

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    flt_es = OutputBlockField(name="flt_zscore_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(ZScoreBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")

        self.celery_task = wrapper_task.s(zscore_task,
                                          exp,
                                          self,
                                          es=es,
                                          base_filename="%s_%s_flt" %
                                          (self.uuid, 'zscore'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("flt_zscore_es", flt_es)
        exp.store_block(self)
예제 #11
0
    def add_cell(self, exp, received_block, *args, **kwargs):
        new_cell_dict = received_block.get("cells", {}).get("new")
        if new_cell_dict:
            cell = CellInfo(new_cell_dict["label"])
            for field_prototype in self.cells_prototype.cells_list:
                new_name = "%s_%s" % (field_prototype.name, len(self.cells.cells))
                cell.inputs_list.append((field_prototype.name, new_name))
                # TODO: add input port to block
                new_port = InputBlockField(
                    name=new_name,
                    required_data_type=field_prototype.data_type,
                    required=True
                )
                self.add_input_port(new_port)

            self.cells.cells.append(cell)
            exp.store_block(self)
예제 #12
0
class CrossValidation(UniformMetaBlock):
    block_base_name = "CROSS_VALID"
    name = "Cross Validation K-fold"

    _cv_actions = ActionsList(
        [ActionRecord("become_ready", ["valid_params"], "ready")])
    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["cv_info.html"])

    _input_es_dyn = InputBlockField(name="es_inputs",
                                    required_data_type="ExpressionSet",
                                    required=True,
                                    multiply_extensible=True,
                                    order_num=-1)

    folds_num = ParamField(name="folds_num",
                           title="Folds number",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=5)
    repeats_num = ParamField(name="repeats_num",
                             title="Repeats number",
                             order_num=20,
                             input_type=InputType.TEXT,
                             field_type=FieldType.INT,
                             init_val=1)

    def get_fold_labels(self):
        out = []
        for repeat in range(self.repeats_num):
            for num in range(self.folds_num):
                out.append("fold_%s_%s" % (repeat + 1, num + 1))
        return out  # ["fold_%s_%s" % (repeat + 1, num + 1) for num in range(self.folds_num) for repeat in range(self.repeats_num)]

    def get_repeat_labels(self):
        return [
            "repeat_%s" % (repeat + 1) for repeat in range(self.repeats_num)
        ]

    def add_dyn_input_hook(self, exp, dyn_port, new_port):
        """
            @type new_port: InputBlockField
        """
        new_inner_output_train = InnerOutputField(
            name="%s_train_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        new_inner_output_test = InnerOutputField(
            name="%s_test_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        self.inner_output_es_names_map[new_port.name] = \
            (new_inner_output_train.name, new_inner_output_test.name)

        self.register_inner_output_variables(
            [new_inner_output_train, new_inner_output_test])

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()

        self.inner_output_manager.reset()
        es_dict = {
            inp_name: self.get_input_var(inp_name)
            for inp_name in self.es_inputs
        }

        self.celery_task = wrapper_task.s(
            generate_cv_folds,
            exp,
            self,
            folds_num=self.folds_num,
            repeats_num=self.repeats_num,
            es_dict=es_dict,
            inner_output_es_names_map=self.inner_output_es_names_map,
            success_action="on_folds_generation_success",
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs)
        self.do_action("become_ready", exp)

    def become_ready(self, *args, **kwargs):
        pass

    def build_result_collection(self, exp):
        if settings.CELERY_DEBUG:
            import sys
            sys.path.append(
                '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
            )
            import pydevd
            pydevd.settrace('localhost',
                            port=6901,
                            stdoutToServer=True,
                            stderrToServer=True)

        rc = ResultsContainer(base_dir=exp.get_data_folder(),
                              base_filename="%s" % self.uuid)
        res_seq = self.res_seq

        def create_new_dim_rc(local_rc, axis_meta_block,
                              axis_meta_block_labels):
            local_rc.axis_list = [axis_meta_block]
            local_rc.labels_dict[axis_meta_block] = axis_meta_block_labels
            local_rc.init_ar()
            local_rc.update_label_index()

        # WARNING: We only support homogeneous results, so we only check first element
        res_seq_field_name, data_type = res_seq.fields.iteritems().next()
        if data_type == "ClassifierResult":
            fold_labels = self.get_fold_labels()
            single_rc_list = []
            for field_name in res_seq.fields:
                run_num = 0
                loc_list = []
                for idx, res_seq_cell in enumerate(res_seq.sequence):
                    if (idx % self.folds_num) == 0:
                        rc_run = ResultsContainer("", "")
                        create_new_dim_rc(rc_run, self.base_name + "_folds", [
                            "fold_%s" % fold_num
                            for fold_num in range(self.folds_num)
                        ])
                        loc_list.append(rc_run)
                        run_num += 1
                    rc_run.ar[idx % self.folds_num] = res_seq_cell[field_name]
                rc_single = ResultsContainer("", "")
                rc_single.add_dim_layer(loc_list, self.base_name,
                                        self.get_repeat_labels())
                single_rc_list.append(rc_single)
            rc.add_dim_layer(single_rc_list, self.collector_spec.label,
                             res_seq.fields.keys())

        elif data_type == "ResultsContainer":
            if len(res_seq.fields) > 1:
                raise Exception(
                    "Meta block only support single output of type ResultsContainer"
                )

            else:
                rc_list = []
                for cell in res_seq.sequence:
                    sub_rc = cell[res_seq_field_name]
                    sub_rc.load()
                    rc_list.append(sub_rc)

                rc.add_dim_layer(rc_list, self.base_name,
                                 self.get_fold_labels())

        elif data_type == "SequenceContainer":
            # TODO remove this check
            pass
        else:
            raise Exception("Meta blocks only support ClassifierResult "
                            "or ResultsContainer in the output collection. "
                            " Instead got: %s" % data_type)

        rc.store()
        rc.ar = None
        self.set_out_var("results_container", rc)
예제 #13
0
class PatternSearch(GenericBlock):
    block_base_name = "PattSearch"
    name = "Pattern Search"
    block_group = GroupType.PATTERN_SEARCH

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA",
                             order_num=10,
                             required_data_type="ExpressionSet",
                             required=True)
    _mi_rna = InputBlockField(name="miRNA",
                              order_num=20,
                              required_data_type="ExpressionSet",
                              required=False)

    gene2gene = InputBlockField(name="gene2gene",
                                order_num=30,
                                required_data_type="BinaryInteraction",
                                required=True)
    miRNA2gene = InputBlockField(name="miRNA2gene",
                                 order_num=31,
                                 required_data_type="BinaryInteraction",
                                 required=False)

    genes_num = ParamField(name="genes_num",
                           title="Number of Genes",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=100)

    # upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=32,
    #                                        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)

    # upload_mirna_platform = ParamField("upload_mirna_platform", title="miRNA platform", order_num=33,
    #                                    input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False)

    d = ParamField(name="d",
                   order_num=70,
                   title="d",
                   input_type=InputType.TEXT,
                   field_type=FieldType.INT,
                   init_val=2)
    min_imp = ParamField(name="min_imp",
                         order_num=80,
                         title="Minimal improvement",
                         input_type=InputType.TEXT,
                         field_type=FieldType.FLOAT,
                         init_val=0.06)

    _metric = ParamField(
        "metric",
        title="Metric",
        order_num=40,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        init_val="mutual_information",
        options={
            "inline_select_provider":
            True,
            "select_options":
            [["mutual_information", "Mutual Information"],
             ['normed_mutual_information', "Normed Mutual Information"],
             ['square_error', "Square Error"], ['correlation', "Correlation"],
             ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]]
        })
    patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(PatternSearch, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        exp.log(self.uuid, "Execute called")

        self.celery_task = wrapper_task.s(
            pattern_search,
            exp,
            self,
            m_rna_es=self.get_input_var("mRNA"),
            mi_rna_es=self.get_input_var("miRNA"),
            gene2gene=self.get_input_var("gene2gene"),
            miRNA2gene=self.get_input_var("miRNA2gene"),
            radius=self.d,
            min_imp=self.min_imp,
            number_of_genes=self.genes_num,
            metric=self.get_input_var("metric"),
            base_filename="%s_comodule_sets" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, gs):
        exp.log(self.uuid, "Success")
        self.set_out_var("patterns", gs)
        exp.store_block(self)
예제 #14
0
class NIMFASNMNMFBlock(GenericBlock):
    block_base_name = "NIMFA_SNMNMF"
    name = "NIMFA SNMNMF"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True)
    _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=True)
    # _dna_methyl = InputBlockField(name="DNAmethyl", order_num=30, required_data_type="ExpressionSet", required=False)
    _gene2gene = InputBlockField(name="Gene2Gene", order_num=40, required_data_type="BinaryInteraction", required=True)
    _mirna2gene = InputBlockField(name="miRNA2gene", order_num=50, required_data_type="BinaryInteraction",
                                  required=True)
    # _gene2DNAmethylation =  InputBlockField(name="Gene2DNAmethyl", order_num=60, required_data_type="BinaryInteraction", required=False)


    l1 = ParamField(name="l1", order_num=70, title="l1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    l2 = ParamField(name="l2", order_num=80, title="l2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g1 = ParamField(name="g1", order_num=90, title="g1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g2 = ParamField(name="g2", order_num=100, title="g2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    rank = ParamField(name="rank", order_num=110, title="rank", input_type=InputType.TEXT, field_type=FieldType.INT,
                      init_val=50)

    w = OutputBlockField(name="W", provided_data_type="ExpressionSet")
    H1_miRNA = OutputBlockField(name="H1_miRNA", provided_data_type="ExpressionSet")
    H2_genes = OutputBlockField(name="H2_genes", provided_data_type="ExpressionSet")
    # H3_DNAmethyl = OutputBlockField(name="H3_DNAmethyl", provided_data_type="ExpressionSet")

    #H1_perf = OutputBlockField(name="H1_perf", provided_data_type="ExpressionSet")
    #H2_perf = OutputBlockField(name="H2_perf", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(NIMFASNMNMFBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA = self.get_input_var("mRNA")
        miRNA = self.get_input_var("miRNA")
        #DNAmethyl = self.get_input_var("DNAmethyl")
        Gene2Gene = self.get_input_var("Gene2Gene")
        miRNA2gene = self.get_input_var("miRNA2gene")
        #Gene2DNAmethyl = self.get_input_var("Gene2DNAmethyl")

        self.celery_task = wrapper_task.s(
            nimfa_snmnmf_task,
            exp,
            self,
            mRNA=mRNA,
            miRNA=miRNA,
            #DNAmethyl = DNAmethyl,
            gene2gene=Gene2Gene,
            miRNA2gene=miRNA2gene,
            #gene2DNAmethylation = Gene2DNAmethyl,
            params={'l1': self.l1, 'l2': self.l2, 'g1': self.g1, 'g2': self.g2, 'rank': self.rank},
            base_filename="%s_nimfa_snmnmf" % self.uuid
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, W, H1, H2):
        self.set_out_var("W", W)
        self.set_out_var("H1_miRNA", H1)
        self.set_out_var("H2_genes", H2)
        #self.set_out_var("H1_perf", matrices[3])
        #self.set_out_var("H2_perf", matrices[4])
        exp.store_block(self)
예제 #15
0
class ComoduleSetView(GenericBlock):
    block_base_name = "CS_VIEW"
    block_group = GroupType.VISUALIZE
    name = "Comodule Set View"

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"),
    ])

    input_comodule_set = InputBlockField(name="cs", order_num=10,
                                         required_data_type="ComoduleSet", required=True)
    _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True)
    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                         field_type=FieldType.STR, is_a_property=True)
    _export_results_csv_url = BlockField(name="export_results_csv_url",
                                   field_type=FieldType.STR, is_a_property=True)
    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "comodule_set_view.html"
    ])

    @property
    def export_results_csv_url(self):
        return reverse("block_field_formatted", kwargs={
            "exp_id": self.exp_id,
            "block_uuid": self.uuid,
            "field": "export_csv",
            "format": "csv"
        })

    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted", kwargs={
            "exp_id": self.exp_id,
            "block_uuid": self.uuid,
            "field": "export_json",
            "format": "json"
        })

    @property
    def table_js(self):
        cs = self.get_input_var("cs")
        """:type :ComoduleSet"""
        if cs:
            table = cs.load_set()
            """:type :dict"""
            if isinstance(table[0], set):
                columns = ["values"]
            else:
                columns = ["values", "values"]

            # table_headers = ["#"] + table.columns.tolist()
            table_headers = ["#"] + columns
            column_title_to_code_name = {
                title: "_" + hashlib.md5(title).hexdigest()[:8]
                for title in table_headers
            }
            fields_list = [column_title_to_code_name[title] for title in table_headers]

            return {
                "columns": [
                    {
                        "title": title,
                        "field": column_title_to_code_name[title],
                        "visible": True
                    }
                    for title in table_headers
                ],
                "rows": [
                        dict(zip(fields_list, [idx, value]))
                        for idx, value in
                            table.iteritems()  # [:100]
                ]
            }
        else:
            return None

    def export_json(self, exp, *args, **kwargs):
        ds = self.get_input_var("cs")
        table = ds.load_set()
        return [(idx, list(value)) for idx, value in table.iteritems()]


    def export_csv(self, exp, *args, **kwargs):
        import csv
        import StringIO
        ds = self.get_input_var("cs")
        tab = ds.load_set()
        out = StringIO.StringIO()
        w = csv.writer(out)
        w.writerows(tab.items())
        out.seek(0)
        return out.read()
예제 #16
0
class GeneSetsView(GenericBlock):
    block_base_name = "GS_VIEW"
    block_group = GroupType.VISUALIZE
    name = "Gene Sets view"

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
    ])

    _input_dictionary_set = InputBlockField(name="gs",
                                            order_num=10,
                                            required_data_type="GeneSets",
                                            required=True)

    _table_for_js = BlockField(name="table_js",
                               field_type=FieldType.RAW,
                               is_a_property=True)

    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                         field_type=FieldType.STR,
                                         is_a_property=True)

    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["dictionary_set_view.html"])

    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted",
                       kwargs={
                           "exp_id": self.exp_id,
                           "block_uuid": self.uuid,
                           "field": "export_json",
                           "format": "json"
                       })

    @property
    def table_js(self):
        cs = self.get_input_var("gs")
        """:type :GeneSets"""
        if cs:
            table = cs.get_gs(conv=False).genes
            table_headers = ['key', 'value']

            column_title_to_code_name = {
                title: "_" + hashlib.md5(title).hexdigest()[:8]
                for title in table_headers
            }
            fields_list = [
                column_title_to_code_name[title] for title in table_headers
            ]

            return {
                "columns": [{
                    "title": title,
                    "field": column_title_to_code_name[title],
                    "visible": True
                } for title in table_headers],
                "rows": [
                    dict(zip(fields_list, row))
                    for row in [(k, list(v)) for k, v in table.iteritems()]
                    # table.to_records().tolist() #[:100]
                ]
            }
        else:
            return None

    def export_json(self, exp, *args, **kwargs):
        ds = self.get_input_var("gs")
        dic = ds.get_gs().genes
        return dic
예제 #17
0
class GeneSetAggCV(GenericBlock):
    block_group = GroupType.AGGREGATION
    block_base_name = "CV_GS_A"
    name = "CV Gene Sets Aggregation"
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _input_train_es = InputBlockField(name="train_es", order_num=10,
                                      required_data_type="ExpressionSet", required=True)
    _input_test_es = InputBlockField(name="test_es", order_num=20,
                                     required_data_type="ExpressionSet", required=True)

    _input_gs = InputBlockField(name="gs", order_num=30,
                                required_data_type="GeneSets", required=True)

    agg_method = ParamField(
        "agg_method", title="Aggregate method", order_num=50,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="mean",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["mean", "Mean"],
                ["median", "Median"],
                ["pca", "PCA"]
            ]
        }
    )

    out_train_es = OutputBlockField(name="out_train_es", provided_data_type="ExpressionSet")
    out_test_es = OutputBlockField(name="out_test_es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAggCV, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        train_es = self.get_input_var("train_es")
        test_es = self.get_input_var("test_es")

        gene_sets = self.get_input_var("gs")

        self.celery_task = wrapper_task.s(
            agg_task_cv,
            exp, self,
            train_es=train_es,
            test_es=test_es,
            gene_sets=gene_sets,
            method=self.agg_method,
            base_filename="%s_%s_agg" % (self.uuid, "pca_cv")
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, out_train_es, out_test_es):
        self.set_out_var("out_train_es", out_train_es)
        self.set_out_var("out_test_es", out_test_es)
        exp.store_block(self)
예제 #18
0
class MergeComoduleSets(GenericBlock):
    block_base_name = "MERGE_COMODULE_SETS"
    name = "Merge Comodule Sets"

    is_abstract = False
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="cs_1",
                            order_num=10,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_1_name = ParamField(name="cs_1_name",
                            order_num=11,
                            title="Comodule 1 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    _cs_2 = InputBlockField(name="cs_2",
                            order_num=20,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_2_name = ParamField(name="cs_2_name",
                            order_num=21,
                            title="Comodule 2 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    flt_es = OutputBlockField(name="comodule_set",
                              provided_data_type="ComoduleSet")

    def __init__(self, *args, **kwargs):
        super(MergeComoduleSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        cs_1 = self.get_input_var("cs_1")
        cs_2 = self.get_input_var("cs_2")

        self.celery_task = wrapper_task.s(merge_comodules_task,
                                          exp,
                                          self,
                                          cs_1=cs_1,
                                          cs_2=cs_2,
                                          cs_1_name=self.cs_1_name,
                                          cs_2_name=self.cs_2_name,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'merge_cs'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("comodule_set", flt_es)
        exp.store_block(self)
예제 #19
0
class PcaVisualize(GenericBlock):
    block_base_name = "PCA_VISUALIZE"
    name = "2D PCA Plot"
    block_group = GroupType.VISUALIZE

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("compute_pca", ["valid_params"],
                     "computing_pca",
                     user_title="Compute PCA"),
        ActionRecord(
            "pca_done",
            ["computing_pca"],
            "done",
        ),
        ActionRecord("reset_execution",
                     ["*", "done", "execution_error", "ready", "working"],
                     "ready",
                     user_title="Reset execution")

        #ActionRecord("update", ["input_bound", "ready"], "ready"),
    ])

    input_es = InputBlockField(name="es",
                               order_num=10,
                               required_data_type="ExpressionSet",
                               required=True)

    chart_series = BlockField(name="chart_series",
                              field_type=FieldType.RAW,
                              init_val=[])
    chart_categories = BlockField(name="chart_categories",
                                  field_type=FieldType.SIMPLE_LIST,
                                  init_val=[])

    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["pca.html"])

    def __init__(self, *args, **kwargs):
        super(PcaVisualize, self).__init__("PCA visualise", *args, **kwargs)

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(PcaVisualize, self).on_params_is_valid(exp, *args, **kwargs)

        self.do_action("compute_pca", exp)

    def compute_pca(self, exp, *args, **kwargs):
        log.info("compute pca invoked")

        es = self.get_input_var("es")
        """:type :ExpressionSet"""
        df = es.get_assay_data_frame()
        pheno_df = es.get_pheno_data_frame()
        target_column = es.pheno_metadata['user_class_title']

        X = df.as_matrix().transpose()

        pca_model = decomposition.PCA(n_components=2)
        pca_model.fit(X)
        Xp = pca_model.transform(X).tolist()

        names = [x.strip() for x in pheno_df[target_column].tolist()]

        series_by_names = defaultdict(list)
        for x, name in zip(Xp, names):
            series_by_names[name].append(x)

        self.chart_series = [{
            "name": name,
            "data": points
        } for name, points in series_by_names.iteritems()]
        self.do_action("pca_done", exp)

    def pca_done(self, exp, *args, **kwargs):
        log.info("pca done")
예제 #20
0
class CrossValidation(UniformMetaBlock):
    block_base_name = "CROSS_VALID"
    name = "Cross validation K-fold"

    _cv_actions = ActionsList(
        [ActionRecord("become_ready", ["valid_params"], "ready")])
    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["cv_info.html"])

    _input_es_dyn = InputBlockField(name="es_inputs",
                                    required_data_type="ExpressionSet",
                                    required=True,
                                    multiply_extensible=True,
                                    order_num=-1)

    folds_num = ParamField(name="folds_num",
                           title="Folds number",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=5)
    repeats_num = ParamField(name="repeats_num",
                             title="Repeats number",
                             order_num=20,
                             input_type=InputType.TEXT,
                             field_type=FieldType.INT,
                             init_val=1)

    def get_fold_labels(self):
        return [
            "fold_%s" % (num + 1, )
            for num in range(self.folds_num * self.repeats_num)
        ]

    def add_dyn_input_hook(self, exp, dyn_port, new_port):
        """
            @type new_port: InputBlockField
        """
        new_inner_output_train = InnerOutputField(
            name="%s_train_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        new_inner_output_test = InnerOutputField(
            name="%s_test_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        self.inner_output_es_names_map[new_port.name] = \
            (new_inner_output_train.name, new_inner_output_test.name)

        self.register_inner_output_variables(
            [new_inner_output_train, new_inner_output_test])

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()

        self.inner_output_manager.reset()
        es_dict = {
            inp_name: self.get_input_var(inp_name)
            for inp_name in self.es_inputs
        }

        self.celery_task = wrapper_task.s(
            generate_cv_folds,
            exp,
            self,
            folds_num=self.folds_num,
            repeats_num=self.repeats_num,
            es_dict=es_dict,
            inner_output_es_names_map=self.inner_output_es_names_map,
            success_action="on_folds_generation_success",
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs)
        self.do_action("become_ready", exp)

    def become_ready(self, *args, **kwargs):
        pass
예제 #21
0
class GenericRankingBlock(GenericBlock):
    block_base_name = ""
    block_group = GroupType.PROCESSING
    is_abstract = True

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(
        name="es", order_num=10,
        required_data_type="ExpressionSet", required=True
    )

    ## TODO: remove from generic ranking
    best = ParamField(
        name="best", title="Consider only best",
        input_type=InputType.TEXT,
        field_type=FieldType.INT, init_val=None
    )

    _result = OutputBlockField(name="result", field_type=FieldType.STR,
                               provided_data_type="TableResult", init_val=None)

    def __init__(self, *args, **kwargs):
        super(GenericRankingBlock, self).__init__(*args, **kwargs)
        self.ranking_name = None
        self.ranking_options = {}
        self.celery_task = None

        exp = Experiment.get_exp_by_id(self.exp_id)
        self.result = TableResult(
            base_dir=exp.get_data_folder(),
            base_filename="%s_gt_result" % self.uuid,
        )
        self.set_out_var("result", self.result)

    def collect_options(self):
        self.ranking_options = {}

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.collect_options()

        self.celery_task = wrapper_task.s(
            apply_ranking,
            exp=exp, block=self,
            es=self.get_input_var("es"),
            ranking_name=self.ranking_name,
            result_table=self.result,
            options=self.ranking_options
        )
        exp.store_block(self)
        self.celery_task.apply_async()
        exp.log(self.uuid, "Sent ranking computation to queue")
        log.debug("Sent ranking computation to queue")

    def success(self, exp, result, *args, **kwargs):
        self.result = result
        self.set_out_var("result", self.result)
        exp.store_block(self)
예제 #22
0
파일: ncf.py 프로젝트: strny007/miXGENE
class NCF(GenericBlock):
    block_group = GroupType.CLASSIFIER

    block_base_name = "NCF"
    name = "Network-Constrained Forest"

    classifier_name = "ncf"

    is_abstract = False

    is_block_supports_auto_execution = True

    # Block behavior
    _block_actions = ActionsList([])
    _block_actions.extend(save_params_actions_list)
    _block_actions.extend(execute_block_actions_list)

    gene2gene = InputBlockField(name="gene2gene",
                                order_num=30,
                                required_data_type="BinaryInteraction",
                                required=True)
    miRNA2gene = InputBlockField(name="miRNA2gene",
                                 order_num=31,
                                 required_data_type="BinaryInteraction",
                                 required=True)

    # User defined parameters
    # Input ports definition
    _m_train_es = InputBlockField(name="mRNA_train_es",
                                  order_num=10,
                                  required_data_type="ExpressionSet",
                                  required=True)
    _m_test_es = InputBlockField(name="mRNA_test_es",
                                 order_num=20,
                                 required_data_type="ExpressionSet",
                                 required=True)
    _mi_train_es = InputBlockField(name="miRNA_train_es",
                                   order_num=21,
                                   required_data_type="ExpressionSet",
                                   required=True)
    _mi_test_es = InputBlockField(name="miRNA_test_es",
                                  order_num=22,
                                  required_data_type="ExpressionSet",
                                  required=True)

    # Provided outputs
    _result = OutputBlockField(name="result",
                               field_type=FieldType.CUSTOM,
                               provided_data_type="ClassifierResult",
                               init_val=None)

    n_estimators = ParamField(name="n_estimators",
                              title="The number of trees in the forest",
                              input_type=InputType.TEXT,
                              field_type=FieldType.INT,
                              init_val="1000",
                              order_num=41)

    walk_max_length = ParamField(name="walk_max_length",
                                 title="Walk max length",
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.INT,
                                 init_val="10",
                                 order_num=50)

    criterion = ParamField(
        name="criterion",
        title="The function to measure the quality of a split",
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        order_num=60,
        options={
            "inline_select_provider":
            True,
            "select_options": [["gini", "Gini impurity"],
                               ["entropy", "Information gain"]]
        })

    eps = ParamField(name="eps",
                     title="Eps",
                     input_type=InputType.TEXT,
                     field_type=FieldType.FLOAT,
                     init_val="0.01",
                     order_num=70)

    max_depth = ParamField(name="max_depth",
                           title="The maximum depth of the tree",
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val="2",
                           order_num=80)

    min_samples_split = ParamField(
        name="min_samples_split",
        title="The minimum number of samples to split an internal node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        init_val="2",
        order_num=90,
    )

    min_samples_leaf = ParamField(
        name="min_samples_leaf",
        title="The minimum number of samples to be at a leaf node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        init_val="2",
        order_num=100)

    bootstrap = ParamField(name="bootstrap",
                           title="bootstrap",
                           input_type=InputType.CHECKBOX,
                           field_type=FieldType.BOOLEAN,
                           required=False,
                           order_num=110)

    def __init__(self, *args, **kwargs):
        super(NCF, self).__init__(*args, **kwargs)

        self.celery_task = None
        self.classifier_options = {}
        self.fit_options = {}

    def execute(self, exp, *args, **kwargs):
        self.set_out_var("result", None)
        self.collect_options()

        mRNA_train_es = self.get_input_var("mRNA_train_es")
        mRNA_test_es = self.get_input_var("mRNA_test_es")

        miRNA_train_es = self.get_input_var("miRNA_train_es")
        miRNA_test_es = self.get_input_var("miRNA_test_es")

        self.celery_task = wrapper_task.s(
            apply_ncf_classifier,
            exp=exp,
            block=self,
            mRNA_train_es=mRNA_train_es,
            mRNA_test_es=mRNA_test_es,
            miRNA_train_es=miRNA_train_es,
            miRNA_test_es=miRNA_test_es,
            classifier_name=self.classifier_name,
            classifier_options=self.classifier_options,
            fit_options=self.fit_options,
            base_folder=exp.get_data_folder(),
            base_filename="%s_%s" % (self.uuid, self.classifier_name),
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        # We store obtained result as an output variable
        self.set_out_var("result", result)
        exp.store_block(self)

    def reset_execution(self, exp, *args, **kwargs):
        self.clean_errors()
        # self.get_scope().remove_temp_vars()
        self.set_out_var("result", None)
        exp.store_block(self)

    def get_option_safe(self, name, target_type=None):
        if hasattr(self, name):
            raw = getattr(self, name)
            if raw:
                if target_type:
                    try:
                        return target_type(raw)
                    except:
                        pass
                else:
                    return raw
        return None

    def collect_option_safe(self, name, target_type=None, target_name=None):
        value = self.get_option_safe(name, target_type)
        # from celery.contrib import rdb; rdb.set_trace()
        if value:
            if target_name:
                self.classifier_options[target_name] = value
            else:
                self.classifier_options[name] = value
        return value

    def collect_options(self):
        self.classifier_options["gene2gene"] = self.get_input_var("gene2gene")
        self.classifier_options["miRNA2gene"] = self.get_input_var(
            "miRNA2gene")
        self.classifier_options['walk_lengths'] = range(
            1, int(self.walk_max_length))
        self.collect_option_safe("eps")
        self.collect_option_safe("n_estimators", int)
        # self.collect_option_safe("max_features")
        self.collect_option_safe("max_depth", int)
        self.collect_option_safe("min_samples_leaf", int)
        self.collect_option_safe("min_samples_split", int)
        self.classifier_options["bootstrap"] = self.bootstrap
예제 #23
0
class EnrichmentVisualize(GenericBlock):
    block_base_name = "EV_VIEW"
    block_group = GroupType.VISUALIZE
    name = "Enrichment Visualize"

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params")
    ])

    _input_dictionary_set = InputBlockField(name="ds", order_num=10,
                               required_data_type="DictionarySet", required=True)

    _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True)

    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                   field_type=FieldType.STR, is_a_property=True)

    _export_results_csv_url = BlockField(name="export_results_csv_url",
                                   field_type=FieldType.STR, is_a_property=True)


    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "enrichment_view.html"
    ])

    def map_to_symbols(self, gene_set):
        genes = [gi.name for gi in GeneIdentifier.objects.filter(refseq__refseq__in=gene_set)]
        return genes

    @property
    def export_results_csv_url(self):
        return reverse("block_field_formatted", kwargs={
            "exp_id": self.exp_id,
            "block_uuid": self.uuid,
            "field": "export_csv",
            "format": "csv"
        })

    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted", kwargs={
            "exp_id": self.exp_id,
            "block_uuid": self.uuid,
            "field": "export_json",
            "format": "json"
        })


    @property
    def table_js(self):
        cs = self.get_input_var("ds")
        """:type :DictionarySet"""
        if cs:
            table = cs.load_dict()
            table_headers = ['comodule','(term, p-val)','genes']

            column_title_to_code_name = {
                title: "_" + hashlib.md5(title).hexdigest()[:8]
                for title in table_headers
            }
            fields_list = [column_title_to_code_name[title] for title in table_headers]

            return {
                "columns": [
                    {
                        "title": title,
                        "field": column_title_to_code_name[title],
                        "visible": True
                    }
                    for title in table_headers
                ],
                "rows": [
                    dict(zip(fields_list, row))
                    for row in
                    [(k, v[1], set(self.map_to_symbols(v[0]))) for k, v in table.iteritems()]
                    #table.to_records().tolist() #[:100]
                ]
            }
        else:
            return None

    def export_json(self, exp, *args, **kwargs):
        ds = self.get_input_var("ds")
        dic = ds.load_dict()
        return dic

    def export_csv(self, exp, *args, **kwargs):
        import csv
        import StringIO
        ds = self.get_input_var("ds")
        dic = ds.load_dict()
        out = StringIO.StringIO()
        w = csv.writer(out)
        w.writerows(dic.items())
        out.seek(0)
        return out.read()
예제 #24
0
class GeneSetAgg(GenericBlock):
    block_base_name = "GENE_SET_AGG"
    name = "Gene sets aggregation"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    _gs = InputBlockField(name="gs",
                          order_num=20,
                          required_data_type="GeneSets",
                          required=True)

    agg_method = ParamField("agg_method",
                            title="Aggregate method",
                            order_num=50,
                            input_type=InputType.SELECT,
                            field_type=FieldType.STR,
                            init_val="mean",
                            options={
                                "inline_select_provider":
                                True,
                                "select_options": [["mean", "Mean"],
                                                   ["media", "Median"]]
                            })

    agg_es = OutputBlockField(name="agg_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        gs = self.get_input_var("gs")

        base_filename = "%s_gs_agg" % (self.uuid, )

        self.celery_task = wrapper_task.s(do_gs_agg, exp, self, es, gs,
                                          self.agg_method, base_filename)

        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
예제 #25
0
class MultiFeature(UniformMetaBlock):
    block_base_name = "MULTI_FEATURE"
    name = "Multi Feature Validation"

    _mf_block_actions = ActionsList([
        ActionRecord("on_feature_selection_updated",
                     ["valid_params", "ready", "done"], "ready"),
    ])

    _input_es_dyn = InputBlockField(name="es_inputs",
                                    order_num=-10,
                                    required_data_type="ExpressionSet",
                                    required=True,
                                    multiply_extensible=True)

    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       init_val=False,
                                       is_a_property=True)

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "select_feature": {
                               "title": "Select features to examine",
                               "resource": "select_feature",
                               "widget": "widgets/select_feature.html"
                           },
                       })

    def __init__(self, *args, **kwargs):
        super(MultiFeature, self).__init__(*args, **kwargs)
        self.features = []

    @property
    def is_sub_pages_visible(self):
        if self.state in ['valid_params', 'done', 'ready']:
            return True
        return False

    def get_fold_labels(self):
        return self.features

    def add_dyn_input_hook(self, exp, dyn_port, new_port):
        """
            @type new_port: InputBlockField
        """
        new_inner_output = InnerOutputField(
            name="%s_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        self.inner_output_es_names_map[new_port.name] = new_inner_output.name
        self.register_inner_output_variables([new_inner_output])

    def execute(self, exp, *args, **kwargs):
        # self.celery_task = wrapper_task.s(
        #
        # )
        self.inner_output_manager.reset()
        es_dict = {
            inp_name: self.get_input_var(inp_name)
            for inp_name in self.es_inputs
        }
        self.celery_task = wrapper_task.s(
            prepare_folds,
            exp,
            self,
            features=self.features,
            es_dict=es_dict,
            inner_output_es_names_map=self.inner_output_es_names_map,
            success_action="on_folds_generation_success")
        exp.store_block(self)
        self.celery_task.apply_async()

    def phenotype_for_js(self, exp, *args, **kwargs):
        es = None
        for input_name in self.es_inputs:
            es = self.get_input_var(input_name)
            if es is not None:
                break
        res = prepare_phenotype_for_js_from_es(es)
        res["features"] = self.features
        return res

    def update_feature_selection(self, exp, request, *args, **kwargs):
        req = json.loads(request.body)
        self.features = req["features"]
        if self.features:
            self.do_action("on_feature_selection_updated", exp)

    def on_feature_selection_updated(self, *args, **kwargs):
        pass
예제 #26
0
class PatternView(GenericBlock):
    block_base_name = "PA_VIEW"
    block_group = GroupType.VISUALIZE
    name = "Patterns Visualizer"

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params")
    ])

    _input_patterns = InputBlockField(name="patterns", order_num=10,
                                      required_data_type="GeneSets", required=True)

    _input_edges = InputBlockField(name="edges", order_num=20,
                                   required_data_type="Edges", required=True)

    _diff_expr = InputBlockField(name="diff_expr", order_num=30,
                                   required_data_type="DiffExpr", required=True)



    _graph_for_js = BlockField(name="graph_js", field_type=FieldType.RAW, is_a_property=True)

    _edges_for_js = BlockField(name="edges", field_type=FieldType.RAW, is_a_property=False)

    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                   field_type=FieldType.STR, is_a_property=True)



    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "pattern_view.html"
    ])


    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted", kwargs={
            "exp_id": self.exp_id,
            "block_uuid": self.uuid,
            "field": "export_json",
            "format": "json"
        })


    @property
    def graph_js(self):
        # return None
        diff_expr = self.get_input_var("diff_expr")
        edges = self.get_input_var("edges")
        cs = self.get_input_var("patterns")
        if cs and edges and diff_expr:
            pattern_set = cs.get_gs(conv=False).genes
            edges = edges.load_edges()
            diff_expr = diff_expr.load_expr()
            import math
            # "x": math.cos(2*i*math.pi/len(com)) + 5*math.cos(2*j*math.pi/len(pattern_set)),
            # "y": math.sin(2*i*math.pi/len(com)) + 5*math.sin(2*j*math.pi/len(pattern_set)),
            res = {
                "nodes": [
                    {"id": "%s_%s" % (j, gene),
                    "label": gene,
                    "x": math.cos(2*i*math.pi/len(com)) + math.floor(math.sqrt(len(pattern_set))) * (j % int(math.floor(math.sqrt(len(pattern_set))))),
                    "y": math.sin(2*i*math.pi/len(com)) + math.floor(math.sqrt(len(pattern_set))) * (j / int(math.floor(math.sqrt(len(pattern_set))))),
                    "color": "rgb(%s, %s, %s)" % (abs(int(math.floor(((diff_expr[gene] + 1) * 128) - 1))),
                                                  abs(int(math.floor(255-(((diff_expr[gene] + 1) * 128) - 1)))),
                                                  0),
                    "size": 2 + abs(diff_expr[gene]) * 2 }
                    for j, com in enumerate(pattern_set) for i, gene in enumerate(com)
                ],
                "edges": [
                    {"id": "%s_%s_%s" % (k, i, j),
                     "source": "%s_%s" % (k, i),
                     "target": "%s_%s" % (k, j)}
                    for k, graph_edges in enumerate(edges) for i, j in graph_edges
                ]
            }
            return res
        else:
            return None


    def export_json(self, exp, *args, **kwargs):
        ds = self.get_input_var("es")
        dic = ds.load_set()
        return dic

    def process_upload(self, exp, *args, **kwargs):
        pass

    def success(self, exp, *args, **kwargs):
        pass
예제 #27
0
class FeatureSelectionByCut(GenericBlock):
    block_base_name = "FS_BY_CUT"
    block_group = GroupType.FILTER
    name = "Feature Selection by Ranking"

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    _rank_table = InputBlockField(name="rank_table",
                                  order_num=20,
                                  required_data_type="TableResult",
                                  required=True)

    _cut_property_options = BlockField(name="cut_property_options",
                                       field_type=FieldType.RAW,
                                       is_a_property=True)
    cut_property = ParamField(
        name="cut_property",
        title="Ranking property to use",
        # input_type=InputType.SELECT,
        input_type=InputType.TEXT,
        field_type=FieldType.STR,
        #select_provider="cut_property_options",
        order_num=10,
    )
    threshold = ParamField(
        name="threshold",
        title="Threshold for cut",
        order_num=20,
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
    )
    _cut_direction_options = BlockField(name="cut_direction_options",
                                        field_type=FieldType.RAW)
    cut_direction_options = ["<", "<=", ">=", ">"]
    cut_direction = ParamField(name="cut_direction",
                               title="Direction of cut",
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               select_provider="cut_direction_options",
                               order_num=30,
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [[op, op] for op in ["<", "<=", ">=", ">"]]
                               })

    es = OutputBlockField(name="es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FeatureSelectionByCut, self).__init__(*args, **kwargs)
        self.celery_task = None

    @property
    def cut_property_options(self):
        # import ipdb; ipdb.set_trace()
        rank_table = self.get_input_var("rank_table")
        if rank_table and hasattr(rank_table, "headers"):
            return [{
                "pk": header,
                "str": header
            } for header in rank_table.headers]

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(
            feature_selection_by_cut,
            exp=exp,
            block=self,
            src_es=self.get_input_var("es"),
            rank_table=self.get_input_var("rank_table"),
            cut_property=self.cut_property,
            threshold=self.threshold,
            cut_direction=self.cut_direction,
            base_filename="%s_feature_selection" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("es", es)
        exp.store_block(self)
예제 #28
0
class TableResultView(GenericBlock):
    block_base_name = "TR_VIEW"
    block_group = GroupType.VISUALIZE
    name = "Table Result view"

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
    ])

    input_table_result = InputBlockField(name="tr",
                                         order_num=10,
                                         required_data_type="TableResult",
                                         required=True)

    _table_for_js = BlockField(name="table_js",
                               field_type=FieldType.RAW,
                               is_a_property=True)

    _export_table_url = BlockField(name="export_table_url",
                                   field_type=FieldType.STR,
                                   is_a_property=True)
    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                         field_type=FieldType.STR,
                                         is_a_property=True)

    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["table_result_view.html"])

    @property
    def export_table_url(self):
        return reverse("block_field_formatted",
                       kwargs={
                           "exp_id": self.exp_id,
                           "block_uuid": self.uuid,
                           "field": "export_table",
                           "format": "csv"
                       })

    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted",
                       kwargs={
                           "exp_id": self.exp_id,
                           "block_uuid": self.uuid,
                           "field": "export_rc",
                           "format": "json"
                       })

    @property
    def table_js(self):
        tr = self.get_input_var("tr")
        """:type :TableResult"""
        if tr:
            table = tr.get_table()
            table_headers = ["#"] + table.columns.tolist()

            column_title_to_code_name = {
                title: "_" + hashlib.md5(title).hexdigest()[:8]
                for title in table_headers
            }
            fields_list = [
                column_title_to_code_name[title] for title in table_headers
            ]

            return {
                "columns": [{
                    "title": title,
                    "field": column_title_to_code_name[title],
                    "visible": True
                } for title in table_headers],
                "rows": [
                    dict(zip(fields_list, row))
                    for row in table.to_records().tolist()  #[:100]
                ]
            }
        else:
            return None

    def export_rc(self, exp, *args, **kwargs):
        return self.table_js

    def export_table(self, exp, *args, **kwargs):
        pd_float_format_func = lambda x: "%1.4f" % x
        tr = self.get_input_var("tr")
        """:type :TableResult"""
        table = tr.get_table()
        out = StringIO.StringIO()
        # Float format in fact doesn't work in pandas
        # table.df.to_csv(out, float_format=pd_float_format_func)
        #
        tmp_df = table.applymap(pd_float_format_func)
        tmp_df.to_csv(out, float_format=pd_float_format_func)

        out.seek(0)
        return out.read()
예제 #29
0
파일: filter.py 프로젝트: strny007/miXGENE
class FilterBlock(GenericBlock):
    block_base_name = "FILTER"
    name = "Var/Val Filter"

    is_abstract = False
    block_group = GroupType.FILTER

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    filter_method = ParamField("filter_method",
                               title="Filter method",
                               order_num=50,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val="LOW_VAL",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [["LOW_VAL", "Low Val Filter"],
                                    ["VAR", "Var Filter"]]
                               })
    q = ParamField(name="q",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=30.0)

    flt_es = OutputBlockField(name="flt_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FilterBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")

        self.celery_task = wrapper_task.s(filter_task,
                                          exp,
                                          self,
                                          filter_type=self.filter_method,
                                          q=self.q,
                                          es=es,
                                          base_filename="%s_%s_flt" %
                                          (self.uuid, self.filter_method))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("flt_es", flt_es)
        exp.store_block(self)
예제 #30
0
class GenericClassifier(GenericBlock):
    block_group = GroupType.CLASSIFIER
    is_abstract = True

    is_block_supports_auto_execution = True
    classifier_name = ""
    # Block behavior
    _block_actions = ActionsList([])
    _block_actions.extend(save_params_actions_list)
    _block_actions.extend(execute_block_actions_list)

    # User defined parameters
    # Input ports definition
    _train_es = InputBlockField(name="train_es", order_num=10,
                                required_data_type="ExpressionSet",
                                required=True)
    _test_es = InputBlockField(name="test_es", order_num=20,
                               required_data_type="ExpressionSet",
                               required=True)

    # Provided outputs
    _result = OutputBlockField(name="result", field_type=FieldType.CUSTOM,
                               provided_data_type="ClassifierResult", init_val=None)

    def __init__(self, *args, **kwargs):
        super(GenericClassifier, self).__init__(*args, **kwargs)

        self.celery_task = None
        self.classifier_options = {}
        self.fit_options = {}

    @abstractmethod
    def collect_options(self):
        """
            Should populate `self.classifier_options` and `self.fit_options`
            from block parameters.
        """
        pass

    def get_option_safe(self, name, target_type=None):
        if hasattr(self, name):
            raw = getattr(self, name)
            if raw:
                if target_type:
                    try:
                        return target_type(raw)
                    except:
                        pass
                else:
                    return raw
        return None

    def collect_option_safe(self, name, target_type=None, target_name=None):
        value = self.get_option_safe(name, target_type)
        # from celery.contrib import rdb; rdb.set_trace()
        if value:
            if target_name:
                self.classifier_options[target_name] = value
            else:
                self.classifier_options[name] = value
        return value

    def execute(self, exp,  *args, **kwargs):
        self.set_out_var("result", None)
        self.collect_options()

        train_es = self.get_input_var("train_es")
        test_es = self.get_input_var("test_es")

        self.celery_task = wrapper_task.s(
            apply_classifier,
            exp=exp, block=self,

            train_es=train_es, test_es=test_es,

            classifier_name=self.classifier_name,
            classifier_options=self.classifier_options,
            fit_options=self.fit_options,

            base_folder=exp.get_data_folder(),
            base_filename="%s_%s" % (self.uuid, self.classifier_name),
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        # We store obtained result as an output variable
        self.set_out_var("result", result)
        exp.store_block(self)

    def reset_execution(self, exp, *args, **kwargs):
        self.clean_errors()
        # self.get_scope().remove_temp_vars()
        self.set_out_var("result", None)
        exp.store_block(self)