def i_create_all_resources_to_evaluate_with_model_and_map(step, data=None, fields_map=None, output=None): if data is None or fields_map is None or output is None: assert False command = ( "bigmler --evaluate --test " + res_filename(data) + " --model " + world.model["resource"] + " --output " + output + " --fields-map " + res_filename(fields_map) ) command = check_debug(command) try: retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.directory = os.path.dirname(output) world.folders.append(world.directory) world.output = output assert True except OSError as e: assert False
def i_create_all_anomaly_resources(step, data=None, test=None, output=None): if data is None or test is None or output is None: assert False test = res_filename(test) command = ("bigmler anomaly --train " + res_filename(data) + " --test " + test + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_all_mc_resources(step, data, max_categories=None, objective=None, test=None, output=None): if max_categories is None or test is None or output is None or objective is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) test = res_filename(test) try: command = ( "bigmler --train " + res_filename(data) + " --max-categories " + max_categories + " --objective " + objective + " --test " + test + " --store --output " + output ) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_ml_resources(step, tag=None, label_separator=None, number_of_labels=None, data=None, training_separator=None, test=None, output=None): if tag is None or label_separator is None or training_separator is None or number_of_labels is None or data is None or test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) world.number_of_models = int(number_of_labels) test = res_filename(test) try: command = ("bigmler --multi-label --train " + res_filename(data) + " --label-separator \"" + label_separator + "\" --training-separator \"" + training_separator + "\" --test " + test + " --store --output " + output + " --tag " + tag + " --max-batch-models 1") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_lr_resources(step, data=None, test=None, output=None): ok_(data is not None and test is not None and output is not None) test = res_filename(test) command = ("bigmler linear-regression --train " + res_filename(data) + " --test " + test + " --store --no-bias --default-numeric-value mean --output " + output) shell_execute(command, output, test=test)
def i_create_all_lr_resources(step, data=None, test=None, output=None): ok_(data is not None and test is not None and output is not None) test = res_filename(test) command = ("bigmler logistic-regression --train " + res_filename(data) + " --test " + test + " --store --no-balance-fields --no-bias --output " + output) shell_execute(command, output, test=test)
def i_create_all_mc_resources(step, data, max_categories=None, objective=None, test=None, output=None): if max_categories is None or test is None or output is None or objective is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) test = res_filename(test) try: command = ("bigmler --train " + res_filename(data) + " --max-categories " + max_categories + " --objective " + objective + " --test " + test + " --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_cluster_resources(step, data=None, test=None, output=None): ok_(data is not None and test is not None and output is not None) test = res_filename(test) command = ("bigmler cluster --train " + res_filename(data) + " --test " + test + " --k 8" + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_all_dn_resources(step, data=None, test=None, output=None): ok_(data is not None and test is not None and output is not None) test = res_filename(test) command = ("bigmler deepnet --train " + res_filename(data) + " --test " + test + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_all_resources_to_test_from_stdin(step, data=None, test=None, name=None, output=None): if data is None or test is None or output is None or name is None: assert False test = res_filename(test) command = ("cat " + test + "|bigmler --train " + res_filename(data) + " --test --store --output " + output + " --name \"" + name + "\" --max-batch-models 1") shell_execute(command, output, test=test)
def i_create_all_pca_resources_with_no_headers(step, data=None, test=None, output=None): ok_(data is not None and test is not None and output is not None) test = res_filename(test) command = ("bigmler pca --train " + res_filename(data) + " --test " + test + " --store --output " + output + " --no-train-header --no-test-header") shell_execute(command, output, test=test, options="--projection-header")
def i_create_all_cluster_resources_with_mapping(step, data=None, test=None, fields_map=None, output=None): ok_(data is not None and test is not None and output is not None and fields_map is not None) test = res_filename(test) command = ("bigmler cluster --remote --train " + res_filename(data) + " --test " + test + " --k 8" + " --fields-map " + res_filename(fields_map) + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_all_cluster_resources_to_dataset(step, data=None, test=None, output_dir=None): ok_(data is not None and test is not None and output_dir is not None) test = res_filename(test) command = ("bigmler cluster --remote --train " + res_filename(data) + " --test " + test + " --k 8" + " --to-dataset --no-csv " + " --store --output-dir " + output_dir) shell_execute(command, "%s/x.csv" % output_dir, test=test)
def i_create_all_dn_resources_headers(step, data=None, test=None, output=None): ok_(data is not None and test is not None and output is not None) test = res_filename(test) command = ("bigmler deepnet --train " + res_filename(data) + " --test " + test + " --store --prediction-header --prediction-info full" + " --output " + output) shell_execute(command, output, test=test, options='--prediction-header')
def i_create_all_anomaly_resources_with_mapping(step, data=None, test=None, fields_map=None, output=None): if data is None or test is None or output is None or fields_map is None: assert False test = res_filename(test) command = ("bigmler anomaly --remote --train " + res_filename(data) + " --test " + test + " --fields-map " + res_filename(fields_map) + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_all_cluster_resources_with_prediction_fields(step, data=None, test=None, prediction_fields=None, output=None): ok_(data is not None and test is not None and output is not None and prediction_fields is not None) test = res_filename(test) command = ("bigmler cluster --remote --train " + res_filename(data) + " --test " + test + " --k 8" + " --prediction-fields \"" + prediction_fields + "\" --prediction-info full --prediction-header --store " + "--output " + output) shell_execute(command, output, test=test, options='--prediction-header')
def i_create_dn_resources_from_model_remote_with_options( step, test=None, options_file=None, output=None): ok_(test is not None and output is not None and options_file is not None) test = res_filename(test) options_file = res_filename(options_file) command = ("bigmler deepnet --deepnet " + world.deepnet['resource'] + " --test " + test + " --batch-prediction-attributes " + options_file + " --store --remote --output " + output) shell_execute(command, output, test=test)
def i_create_all_execution_with_io_resources(step, code_file=None, output_dir=None, inputs_dec=None, outputs_dec=None, inputs=None): ok_(code_file is not None and output_dir is not None and inputs_dec is not None and outputs_dec is not None and inputs is not None) command = ("bigmler execute --code-file " + res_filename(code_file) + " --store --declare-inputs " + res_filename(inputs_dec) + " --declare-outputs "+ res_filename(outputs_dec) + " --inputs " + res_filename(inputs) + " --output-dir " + output_dir) shell_execute(command, "%s/xx.txt" % output_dir)
def i_create_dn_resources_from_model_remote_with_options(step, test=None, options_file=None, output=None): ok_(test is not None and output is not None and options_file is not None) test = res_filename(test) options_file = res_filename(options_file) command = ("bigmler deepnet --deepnet " + world.deepnet['resource'] + " --test " + test + " --batch-prediction-attributes " + options_file + " --store --remote --output " + output) shell_execute(command, output, test=test)
def i_create_resources_from_model_with_op_remote(step, operating_point=None, test=None, output=None): ok_(operating_point is not None and test is not None and output is not None) test = res_filename(test) operating_point = res_filename(operating_point) command = ("bigmler --model " + world.model['resource'] + " --test " + test + " --operating-point " + operating_point + " --store --remote --output " + output + " --max-batch-models 1") shell_execute(command, output, test=test)
def i_create_fs_resources_from_model_remote_with_options( step, test=None, output=None, options_file=None): ok_(test is not None and output is not None and options_file is not None) test = res_filename(test) options_file = res_filename(options_file) models = [world.model["resource"], world.deepnet["resource"]] command = ("bigmler fusion --fusion-models " + ",".join(models) + " --test \"" + test + "\" --batch-prediction-attributes " + options_file + " --store --remote --output " + output) shell_execute(command, output, test=test)
def i_create_all_resources_to_model_with_source_attrs( \ self, data=None, source_attributes=None, output=None): ok_(data is not None and source_attributes is not None and output is not None) if source_attributes != "": source_attributes = " --source-attributes " + \ res_filename(source_attributes) command = ("bigmler --train " + res_filename(data) + " --output " + output + source_attributes + " --store --max-batch-models 1 --no-fast") shell_execute(command, output)
def i_create_all_cluster_resources_with_prediction_fields( step, data=None, test=None, prediction_fields=None, output=None): ok_(data is not None and test is not None and output is not None and prediction_fields is not None) test = res_filename(test) command = ("bigmler cluster --remote --train " + res_filename(data) + " --test " + test + " --k 8" + " --prediction-fields \"" + prediction_fields + "\" --prediction-info full --prediction-header --store " + "--output " + output) shell_execute(command, output, test=test, options='--prediction-header')
def i_create_fs_resources_from_model_remote_with_options(step, test=None, output=None, options_file=None): ok_(test is not None and output is not None and options_file is not None) test = res_filename(test) options_file = res_filename(options_file) models = [world.model["resource"], world.deepnet["resource"]] command = ("bigmler fusion --fusion-models " + ",".join(models) + " --test \"" + test + "\" --batch-prediction-attributes " + options_file + " --store --remote --output " + output) shell_execute(command, output, test=test)
def i_create_all_resources_to_test_from_stdin(step, data=None, test=None, name=None, output=None): if data is None or test is None or output is None or name is None: assert False test = res_filename(test) if not PYTHON3: name = name.decode("utf-8") command = (CAT + test + u"|bigmler --train " + res_filename(data) + u" --test --store --output " + output + u" --name \"" + name + u"\" --max-batch-models 1") shell_execute(command, output, test=test)
def i_create_lr_resources_from_model_with_op(step, test=None, output=None, operating_point=None): ok_(test is not None and output is not None and \ operating_point is not None) test = res_filename(test) operating_point = res_filename(operating_point) command = ("bigmler logistic-regression --logistic-regression " + world.logistic_regression['resource'] + " --test " + test + " --operating-point " + operating_point + " --store --no-balance-fields --no-bias --output " + output) shell_execute(command, output, test=test)
def i_create_all_execution_with_io_resources(step, code_file=None, output_dir=None, inputs_dec=None, outputs_dec=None, inputs=None): ok_(code_file is not None and output_dir is not None and inputs_dec is not None and outputs_dec is not None and inputs is not None) command = ("bigmler execute --code-file " + res_filename(code_file) + " --store --declare-inputs " + res_filename(inputs_dec) + " --declare-outputs " + res_filename(outputs_dec) + " --inputs " + res_filename(inputs) + " --output-dir " + output_dir) shell_execute(command, "%s/xx.txt" % output_dir)
def create_dataset_from_batch_anomaly(filename, output=None, args=None): source = world.api.create_source(res_filename(filename)) world.source = source world.directory = os.path.dirname(output) world.output = output world.api.ok(world.source) world.sources.append(source['resource']) world.dataset = world.api.create_dataset(source) world.api.ok(world.dataset) world.datasets.append(world.dataset['resource']) world.anomaly = world.api.create_anomaly(world.dataset, { "seed": "bigml", "anomaly_seed": "bigml" }) world.api.ok(world.anomaly) world.anomalies.append(world.anomaly['resource']) world.batch_anomaly_score = world.api.create_batch_anomaly_score( \ world.anomaly, world.dataset, {"output_dataset": True}) world.api.ok(world.batch_anomaly_score) world.batch_anomaly_scores.append(world.batch_anomaly_score['resource']) world.batch_anomaly_score_dataset = world.api.get_dataset( world.batch_anomaly_score['object']['output_dataset_resource']) world.api.ok(world.batch_anomaly_score_dataset) world.batch_anomaly_score_dataset = world.api.update_dataset( \ world.batch_anomaly_score_dataset, args) world.api.ok(world.batch_anomaly_score_dataset) world.datasets.append(world.batch_anomaly_score_dataset['resource'])
def create_evaluation_split(filename, output=None, args=None): source = world.api.create_source( \ res_filename(filename), {"project": world.project_id}) world.source = source world.directory = os.path.dirname(output) world.output = output world.api.ok(world.source) world.sources.append(source['resource']) world.dataset = world.api.create_dataset(source) world.api.ok(world.dataset) world.datasets.append(world.dataset['resource']) world.dataset_train = world.api.create_dataset(world.dataset, { \ 'sample_rate': 0.7, 'seed': 'BigML'}) world.api.ok(world.dataset_train) world.datasets.append(world.dataset_train['resource']) world.dataset_test = world.api.create_dataset(world.dataset, { \ 'sample_rate': 0.7, 'seed': 'BigML', 'out_of_bag': True}) world.api.ok(world.dataset_test) world.datasets.append(world.dataset_test['resource']) world.model = world.api.create_model(world.dataset_train) world.api.ok(world.model) world.models.append(world.model['resource']) world.evaluation = world.api.create_evaluation( \ world.model, world.dataset_test, args) world.api.ok(world.evaluation) world.evaluations.append(world.evaluation['resource'])
def i_create_pca_resources_from_source( \ step, test=None, output=None): ok_(test is not None and output is not None) test = res_filename(test) command = ("bigmler pca --source " + world.source['resource'] + " --test " + test + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_source_from_stdin(step, data=None, output_dir=None): if data is None or output_dir is None: assert False command = (CAT + res_filename(data) + u"|bigmler --train " + u"--store --no-dataset --no-model --output-dir " + output_dir + u" --max-batch-models 1") shell_execute(command, output_dir + "/test", test=None)
def i_check_anomaly_scores(step, check_file): check_file = res_filename(check_file) predictions_file = world.output try: predictions_file = csv.reader(open(predictions_file, "U"), lineterminator="\n") check_file = csv.reader(open(check_file, "U"), lineterminator="\n") for row in predictions_file: check_row = check_file.next() if len(check_row) != len(row): assert False for index in range(len(row)): dot = row[index].find(".") if dot > 0 or (check_row[index].find(".") > 0 and check_row[index].endswith(".0")): try: decimal_places = min(len(row[index]), len(check_row[index])) - dot - 1 row[index] = round(float(row[index]), decimal_places) check_row[index] = round(float(check_row[index]), decimal_places) except ValueError: pass if check_row[index] != row[index]: print row, check_row assert False assert True except Exception, exc: assert False, str(exc)
def i_create_all_anomaly_resources_with_test_split(step, data=None, test_split=None, output=None): if data is None or output is None or test_split is None: assert False data = res_filename(data) command = ("bigmler anomaly --remote --train " + data + " --test-split " + test_split + " --store --output " + output) shell_execute(command, output, data=data, test_split=test_split)
def i_create_cluster_resources_from_clusters_file(step, clusters_file=None, test=None, output=None): ok_(test is not None and output is not None and clusters_file is not None) test = res_filename(test) command = ("bigmler cluster --clusters " + clusters_file + " --test " + test + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_cluster_resources_from_cluster(step, test=None, output=None): ok_(test is not None and output is not None) test = res_filename(test) command = ("bigmler cluster --cluster " + world.cluster['resource'] + " --test " + test + " --k 8" + " --store --output " + output) shell_execute(command, output, test=test)
def i_create_anomaly_resources_with_options(step, data=None, options=None, output_dir=None): if data is None or output_dir is None or options is None: assert False command = ("bigmler anomaly --train " + res_filename(data) + " " + options + " --anomalies-dataset in --store --output-dir " + output_dir) shell_execute(command, "%s/x.csv" % output_dir, data=data)
def create_dataset_from_batch_anomaly(filename, output=None, args=None): source = world.api.create_source(res_filename(filename)) world.source = source world.directory = os.path.dirname(output) world.output = output world.api.ok(world.source) world.sources.append(source['resource']) world.dataset = world.api.create_dataset(source) world.api.ok(world.dataset) world.datasets.append(world.dataset['resource']) world.anomaly = world.api.create_anomaly(world.dataset, {"seed": "bigml", "anomaly_seed": "bigml"}) world.api.ok(world.anomaly) world.anomalies.append(world.anomaly['resource']) world.batch_anomaly_score = world.api.create_batch_anomaly_score( \ world.anomaly, world.dataset, {"output_dataset": True}) world.api.ok(world.batch_anomaly_score) world.batch_anomaly_scores.append(world.batch_anomaly_score['resource']) world.batch_anomaly_score_dataset = world.api.get_dataset( world.batch_anomaly_score['object']['output_dataset_resource']) world.api.ok(world.batch_anomaly_score_dataset) world.batch_anomaly_score_dataset = world.api.update_dataset( \ world.batch_anomaly_score_dataset, args) world.api.ok(world.batch_anomaly_score_dataset) world.datasets.append(world.batch_anomaly_score_dataset['resource'])
def create_dataset_from_dataset_from_batch_centroid(filename, output=None, args=None): source = world.api.create_source(res_filename(filename)) world.source = source world.directory = os.path.dirname(output) world.output = output world.api.ok(world.source) world.sources.append(source['resource']) world.dataset = world.api.create_dataset(source) world.api.ok(world.dataset) world.datasets.append(world.dataset['resource']) world.cluster = world.api.create_cluster( \ world.dataset, {"cluster_seed": "bigml"}) world.api.ok(world.cluster) world.clusters.append(world.cluster['resource']) world.batch_centroid = world.api.create_batch_centroid( \ world.cluster, world.dataset, {"output_dataset": True}) world.api.ok(world.batch_centroid) world.batch_centroids.append(world.batch_centroid['resource']) world.batch_centroid_dataset = world.api.get_dataset( world.batch_centroid['object']['output_dataset_resource']) world.api.ok(world.batch_centroid_dataset) world.datasets.append(world.batch_centroid_dataset['resource']) world.dataset = world.api.create_dataset( \ world.batch_centroid_dataset['resource'], args) world.api.ok(world.dataset) world.datasets.append(world.dataset['resource'])
def i_create_source_with_project(step, data=None, project=None, output_dir=None): if data is None: assert False world.directory = output_dir world.folders.append(world.directory) #Check if the project already exists previous_projects = world.api.list_projects('name=%s' % project) while previous_projects['meta']['total_count'] > 0: print "the project %s already exists, trying with:" % project project += " " + project print project previous_projects = world.api.list_projects('name=%s' % project) try: command = (u"bigmler --train " + res_filename(data) + u" --no-model --no-dataset --store --output-dir " + output_dir + u" --project=\"" + project + "\"") if not PYTHON3: command = command.encode(SYSTEM_ENCODING) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.output = output_dir assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_source_with_project(step, data=None, project=None, output_dir=None): ok_(data is not None) world.directory = output_dir world.folders.append(world.directory) #Check if the project already exists previous_projects = world.api.list_projects('name=%s' % project) while previous_projects['meta']['total_count'] > 0: print "the project %s already exists, trying with:" % project project += " " + project print project previous_projects = world.api.list_projects('name=%s' % project) try: command = (u"bigmler --train " + res_filename(data) + u" --no-model --no-dataset --store --output-dir " + output_dir + u" --project=\"" + project + "\"") if not PYTHON3: command = command.encode(SYSTEM_ENCODING) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.output = output_dir except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_source_from_file(step, data=None, output_dir=None): ok_(data is not None and output_dir is not None) command = ("bigmler --train " + res_filename(data) + " --store --output-dir " + output_dir + " --no-dataset --no-model --store") shell_execute(command, os.path.join(output_dir, "p.csv"), test=None, project=False)
def i_check_topic_distributions(step, check_file): check_file = res_filename(check_file) predictions_file = world.output import traceback try: with UnicodeReader(predictions_file) as predictions_file: with UnicodeReader(check_file) as check_file: for row in predictions_file: check_row = check_file.next() assert len(check_row) == len(row) for index in range(len(row)): dot = row[index].find(".") decimal_places = 1 if dot > 0 or (check_row[index].find(".") > 0 and check_row[index].endswith(".0")): try: decimal_places = min( \ len(row[index]), len(check_row[index])) - dot - 1 row[index] = round(float(row[index]), decimal_places) check_row[index] = round( float(check_row[index]), decimal_places) except ValueError: decimal_places = 1 assert_almost_equal(check_row[index], row[index], places=(decimal_places - 1)) else: assert_equal(check_row[index], row[index]) except Exception, exc: assert False, traceback.format_exc()