Пример #1
0
    def __init__(self, location=None, dirname=None):
        # TODO: type check - location (str, path-like object)
        # TODO: type check - dirname  (str, path-like object)
        dirhome = os.path.expanduser('~')
        self.location = os.path.abspath(assign_if_none(location, dirhome))
        self.dirname = assign_if_none(dirname,
                                      '.{dirname}'.format(dirname=CONFIG.NAME))

        self.dirpath = os.path.join(self.location, self.dirname)
Пример #2
0
    def __init__(self, schema=None):
        # check_mapping(schema)
        self.schema = assign_if_none(schema, {})

        self.children = []

        self.update(self.schema)
Пример #3
0
    def __init__(self, schema = None):
        # check_mapping(schema)
        self.schema   = assign_if_none(schema, { })

        self.children = [ ]

        self.update(self.schema)
Пример #4
0
    def __init__(self, email, name = None):
        # TODO: type check and validate - email (str), valid email
        # TODO: type check - name (str)
        # TODO: Maybe try saving base parameters as environment variables?

        self.email      = email
        self.name       = assign_if_none(name, Client.NAME)

        # TODO: Should we cache databases?
        self.databases  = self.info(refresh_cache = True)
Пример #5
0
    def __init__(self, email, name=None):
        # TODO: type check and validate - email (str), valid email
        # TODO: type check - name (str)
        # TODO: Maybe try saving base parameters as environment variables?

        self.email = email
        self.name = assign_if_none(name, Client.NAME)

        # TODO: Should we cache databases?
        self.databases = self.info(refresh_cache=True)
Пример #6
0
    def __init__(self, status = None, code = 200):
        self.version = CONFIG.VERSION
        self.id      = get_rand_uuid_str()
        self.status  = assign_if_none(status, Response.Status.SUCCESS)
        self.code    = code

        self.schema  = addict.Dict()

        self.schema.id      = get_rand_uuid_str()
        self.schema.version = self.version
        self.schema.status  = self.status
Пример #7
0
    def request(self, method, url, parameters = None, *args, **kwargs):
        parameters = assign_if_none(parameters, dict())
        params     = self.baseparams
        params.update(parameters)

        response = requests.request(method, url, params = params, *args, **kwargs)
        if response.ok:
            data = sanitize_response(response, params['retmode'])
        else:
            response.raise_for_status()

        return data
Пример #8
0
    def __init__(self, status = None, code = 200, data = { }):
        self.version = CONFIG.VERSION
        self.id      = get_rand_uuid_str()
        self.status  = assign_if_none(status, Response.Status.SUCCESS)
        self.code    = code

        self.schema  = addict.Dict()

        self.schema.id      = get_rand_uuid_str()
        self.schema.version = self.version
        self.schema.status  = self.status

        self.set_data(data)
Пример #9
0
    def request(self, method, url, parameters=None, *args, **kwargs):
        parameters = assign_if_none(parameters, dict())
        params = self.baseparams
        params.update(parameters)

        response = requests.request(method,
                                    url,
                                    params=params,
                                    *args,
                                    **kwargs)
        if response.ok:
            data = sanitize_response(response, params['retmode'])
        else:
            response.raise_for_status()

        return data
Пример #10
0
def resource(
        path=None,
        level=None,  # provide an exhaustive search
        filter_=['CDATA', 'CEL']):
    response = Response()

    startdir = assign_if_none(path, CONFIG.App.STARTDIR)

    tree = discover_resource(path=startdir, level=level, filter_=filter_)

    response.set_data(tree)

    dict_ = response.to_dict()
    json_ = jsonify(dict_)
    code = response.code

    return json_, code
Пример #11
0
def test_assign_if_none():
    assert util.assign_if_none(None, 'foo') == 'foo'
    assert util.assign_if_none('foo', 'bar') == 'foo'
Пример #12
0
def write(path, pipeline=None):
    pipeline = assign_if_none(pipeline, [])

    with open(path, mode='w') as f:
        json.dump(pipeline, f, indent=4)
Пример #13
0
def test_assign_if_none():
    assert util.assign_if_none(None,  'foo') == 'foo'
    assert util.assign_if_none('foo', 'bar') == 'foo'
Пример #14
0
    def runner(self, cdat, heap_size = 16384, seed = None, verbose = True):
        self.set_status(Pipeline.RUNNING)

        self.logs.append('Initializing Pipeline')

        para = self.config

        self.logs.append('Reading Pipeline Configuration')

        head = ''
        name = get_rand_uuid_str()

        self.logs.append('Reading Input File')

        for i, stage in enumerate(self.stages):
            if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'):
                self.stages[i].status = Pipeline.RUNNING
            if stage.code ==  'dat.fle':
                head    = os.path.abspath(stage.value.path)
                name, _ = os.path.splitext(stage.value.name)

        self.logs.append('Parsing to ARFF')

        path = os.path.join(head, '{name}.arff'.format(name = name))
        # This bug, I don't know why, using Config.schema instead.
        # cdat.toARFF(path, express_config = para.Preprocess.schema, verbose = verbose)

        for i, stage in enumerate(self.stages):
            if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'):
                self.stages[i].status = Pipeline.COMPLETE

        self.logs.append('Saved ARFF at {path}'.format(path = path))
        self.logs.append('Splitting to Training and Testing Sets')

        JVM.start(max_heap_size = '{size}m'.format(size = heap_size))

        load = Loader(classname = 'weka.core.converters.ArffLoader')
        # data = load.load_file(path)
        # save =  Saver(classname = 'weka.core.converters.ArffSaver')
        data = load.load_file(os.path.join(head, 'iris.arff')) # For Debugging Purposes Only
        data.class_is_last() # For Debugging Purposes Only
        # data.class_index = cdat.iclss

        for i, stage in enumerate(self.stages):
            if stage.code == 'prp.kcv':
                self.stages[i].status = Pipeline.RUNNING

        self.logs.append('Splitting Training Set')

        # TODO - Check if this seed is worth it.
        seed = assign_if_none(seed, random.randint(0, 1000))
        opts = ['-S', str(seed), '-N', str(para.Preprocess.FOLDS)]
        wobj = Filter(classname = 'weka.filters.supervised.instance.StratifiedRemoveFolds', options = opts + ['-V'])
        wobj.inputformat(data)

        tran = wobj.filter(data)

        self.logs.append('Splitting Testing Set')

        wobj.options = opts
        test = wobj.filter(data)

        for i, stage in enumerate(self.stages):
            if stage.code == 'prp.kcv':
                self.stages[i].status = Pipeline.COMPLETE

        self.logs.append('Performing Feature Selection')

        feat = [ ]
        for comb in para.FEATURE_SELECTION:
            if comb.USE:
                for i, stage in enumerate(self.stages):
                    if stage.code == 'ats':
                        search    = stage.value.search.name
                        evaluator = stage.value.evaluator.name

                        if search == comb.Search.NAME and evaluator == comb.Evaluator.NAME:
                            self.stages[i].status = Pipeline.RUNNING

                srch = ASSearch(classname = 'weka.attributeSelection.{classname}'.format(
                    classname = comb.Search.NAME,
                    options   = assign_if_none(comb.Search.OPTIONS, [ ])
                ))
                ewal = ASEvaluation(classname = 'weka.attributeSelection.{classname}'.format(
                    classname = comb.Evaluator.NAME,
                    options   = assign_if_none(comb.Evaluator.OPTIONS, [ ])
                ))

                attr = AttributeSelection()
                attr.search(srch)
                attr.evaluator(ewal)
                attr.select_attributes(tran)

                meta = addict.Dict()
                meta.search    = comb.Search.NAME
                meta.evaluator = comb.Evaluator.NAME
                meta.features  = [tran.attribute(index).name for index in attr.selected_attributes]

                feat.append(meta)

                for i, stage in enumerate(self.stages):
                    if stage.code == 'ats':
                        search    = stage.value.search.name
                        evaluator = stage.value.evaluator.name

                        if search == comb.Search.NAME and evaluator == comb.Evaluator.NAME:
                            self.stages[i].status = Pipeline.COMPLETE

        models = [ ]
        for model in para.MODEL:
            if model.USE:
                summary         = addict.Dict()

                self.logs.append('Modelling {model}'.format(model = model.LABEL))

                summary.label   = model.LABEL
                summary.name    = model.NAME
                summary.options = assign_if_none(model.OPTIONS, [ ])

                for i, stage in enumerate(self.stages):
                    if stage.code == 'lrn' and stage.value.name == model.NAME:
                        self.stages[i].status = Pipeline.RUNNING

                for i, instance in enumerate(data):
                    iclass = list(range(instance.num_classes))
                
                options    = assign_if_none(model.OPTIONS, [ ])
                classifier = Classifier(classname = 'weka.classifiers.{classname}'.format(classname = model.NAME), options = options)
                classifier.build_classifier(tran)
        
                serializer.write(os.path.join(head, '{name}.{classname}.model'.format(
                        name = name,
                    classname = model.NAME
                )), classifier)

                self.logs.append('Testing model {model}'.format(model = model.LABEL))

                evaluation       = Evaluation(tran)
                evaluation.test_model(classifier, test)

                summary.summary  = evaluation.summary()

                frame  = pd.DataFrame(data = evaluation.confusion_matrix)
                axes   = sns.heatmap(frame, cbar = False, annot = True)
                b64str = get_b64_plot(axes)
                
                summary.confusion_matrix = addict.Dict({
                    'value': evaluation.confusion_matrix.tolist(),
                     'plot': b64str
                })

                self.logs.append('Plotting Learning Curve for {model}'.format(model = model.LABEL))

                buffer = io.BytesIO()
                plot_classifier_errors(evaluation.predictions, tran, test, outfile = buffer, wait = False)
                b64str = buffer_to_b64(buffer)

                summary.learning_curve   = b64str

                buffer = io.BytesIO()
                plot_roc(evaluation, class_index = iclass, outfile = buffer, wait = False)
                b64str = buffer_to_b64(buffer)

                summary.roc_curve        = b64str

                buffer = io.BytesIO()
                plot_prc(evaluation, class_index = iclass, outfile = buffer, wait = False)
                b64str = buffer_to_b64(buffer)

                summary.prc_curve        = b64str

                if classifier.graph:
                    summary.graph = classifier.graph

                for i, instance in enumerate(test):
                    prediction = classifier.classify_instance(instance)

                for i, stage in enumerate(self.stages):
                    if stage.code == 'lrn' and stage.value.name == model.NAME:
                        self.stages[i].status = Pipeline.COMPLETE

                models.append(summary)

        self.gist.models = models

        JVM.stop()

        JSON.write(os.path.join(head, '{name}.cgist'.format(name = name)), self.gist)

        self.logs.append('Pipeline Complete')

        self.set_status(Pipeline.COMPLETE)
Пример #15
0
def write(path, pipeline = None):
    pipeline = assign_if_none(pipeline, [ ])

    with open(path, mode = 'w') as f:
        json.dump(pipeline, f, indent = 4)