def train_model(self, inputs, outputs, train): # Create a file with the trained data f = open("./data_train.csv", "w") for x0, y0 in zip(inputs[train], outputs[train]): y0 = np.array(y0) line = ",".join(np.insert(x0, len(x0), y0)) f.write(line + "\n") f.close() # Use the training file created previously to train a BigML model source = check_resource( self.api.create_source( './data_train.csv', { 'term_analysis': { "enabled": False }, 'source_parser': { "locale": "en-US" } }), self.api.get_source) dataset = check_resource(self.api.create_dataset(source), self.api.get_dataset) model = check_resource(self.api.create_model(dataset), self.api.get_model) local_model = Model(model) return [source, dataset, model, local_model]
def i_check_create_evaluations(step, number_of_evaluations=None): if number_of_evaluations is not None: world.number_of_evaluations = int(number_of_evaluations) evaluations_file = "%s%sevaluations" % (world.directory, os.sep) evaluation_ids = [] number_of_lines = 0 count = 0 while world.number_of_evaluations != number_of_lines and count < 10: number_of_lines = 0 for line in open(evaluations_file, "r"): number_of_lines += 1 evaluation_id = line.strip() evaluation_ids.append(evaluation_id) if world.number_of_evaluations != number_of_lines: time.sleep(10) count += 1 if world.number_of_evaluations != number_of_lines: assert False, "number of evaluations %s and number of lines in evaluations file %s: %s" % (world.number_of_evaluations, evaluation_file, number_of_lines) world.evaluation_ids = evaluation_ids for evaluation_id in evaluation_ids: try: evaluation = check_resource(evaluation_id, world.api.get_evaluation) world.evaluations.append(evaluation_id) assert True except Exception, exc: assert False, str(exc)
def i_check_create_models_in_ensembles(step, in_ensemble=True): model_file = "%s%smodels" % (world.directory, os.sep) number_of_lines = 0 count = 0 while world.number_of_models != number_of_lines and count < 10: number_of_lines = 0 model_ids = [] for line in open(model_file, "r"): number_of_lines += 1 model_id = line.strip() model_ids.append(model_id) if world.number_of_models != number_of_lines: time.sleep(10) count += 1 if world.number_of_models != number_of_lines: assert False, "number of models %s and number of lines in models file %s: %s" % (world.number_of_models, model_file, number_of_lines) world.model_ids = model_ids for model_id in model_ids: try: model = check_resource(model_id, world.api.get_model) if in_ensemble: ensemble_id = "ensemble/%s" % model['object']['ensemble_id'] if not ensemble_id in world.ensembles: world.ensembles.append(ensemble_id) else: world.models.append(model_id) assert True except Exception, exc: assert False, str(exc)
def retrieve_resource(api, resource_id, query_string=ONLY_MODEL, no_check_fields=False): """ Retrieves resource info either from a local repo or from the remote server """ if api.storage is not None: try: stored_resource = "%s%s%s" % (api.storage, os.sep, resource_id.replace("/", "_")) with open(stored_resource) as resource_file: resource = json.loads(resource_file.read()) # we check that the stored resource has enough fields information # for local predictions to work. Otherwise we should retrieve it. if no_check_fields or check_model_fields(resource): return resource except ValueError: raise ValueError("The file %s contains no JSON") except IOError: pass if api.auth == '?username=;api_key=;': raise ValueError("The credentials information is missing. This" " information is needed to download resource %s" " for the first time and store it locally for further" " use. Please export BIGML_USERNAME" " and BIGML_API_KEY." % resource_id) api_getter = api.getters[get_resource_type(resource_id)] resource = check_resource(resource_id, api_getter, query_string) return resource
def field_attribute_value(step, field=None, attribute=None, attribute_value=None): dataset = check_resource(world.dataset['resource'], world.api.get_dataset) fields = dataset['object']['fields'] eq_(fields[field][attribute], attribute_value)
def i_check_create_evaluations(step, number_of_evaluations=None): if number_of_evaluations is not None: world.number_of_evaluations = int(number_of_evaluations) evaluations_file = "%s%sevaluations" % (world.directory, os.sep) evaluation_ids = [] number_of_lines = 0 count = 0 while world.number_of_evaluations != number_of_lines and count < 10: number_of_lines = 0 for line in open(evaluations_file, "r"): number_of_lines += 1 evaluation_id = line.strip() evaluation_ids.append(evaluation_id) if world.number_of_evaluations != number_of_lines: time.sleep(10) count += 1 if world.number_of_evaluations != number_of_lines: assert False, "number of evaluations %s and number of lines in evaluations file %s: %s" % ( world.number_of_evaluations, evaluation_file, number_of_lines) world.evaluation_ids = evaluation_ids for evaluation_id in evaluation_ids: try: evaluation = check_resource(evaluation_id, world.api.get_evaluation) world.evaluations.append(evaluation_id) assert True except Exception, exc: assert False, str(exc)
def i_check_create_models_in_ensembles(step, in_ensemble=True): model_file = "%s%smodels" % (world.directory, os.sep) number_of_lines = 0 count = 0 while world.number_of_models != number_of_lines and count < 10: number_of_lines = 0 model_ids = [] for line in open(model_file, "r"): number_of_lines += 1 model_id = line.strip() model_ids.append(model_id) if world.number_of_models != number_of_lines: time.sleep(10) count += 1 if world.number_of_models != number_of_lines: assert False, "number of models %s and number of lines in models file %s: %s" % ( world.number_of_models, model_file, number_of_lines) world.model_ids = model_ids for model_id in model_ids: try: model = check_resource(model_id, world.api.get_model) if in_ensemble: ensemble_id = "ensemble/%s" % model['object']['ensemble_id'] if not ensemble_id in world.ensembles: world.ensembles.append(ensemble_id) else: world.models.append(model_id) assert True except Exception, exc: assert False, str(exc)
def i_check_create_models(step): model_file = "%s%smodels" % (world.directory, os.sep) model_ids = [] number_of_lines = 0 count = 0 while world.number_of_models != number_of_lines and count < 10: number_of_lines = 0 for line in open(model_file, "r"): number_of_lines += 1 model_id = line.strip() model_ids.append(model_id) if world.number_of_models != number_of_lines: time.sleep(10) count += 1 if world.number_of_models != number_of_lines: assert False, "number of models %s and number of lines in models file %s: %s" % ( world.number_of_models, model_file, number_of_lines) world.model_ids = model_ids for model_id in model_ids: try: model = check_resource(model_id, world.api.get_model) world.models.append(model_id) assert True except Exception, exc: assert False, str(exc)
def __init__(self, ensemble, api=None, max_models=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.ensemble_id = None if isinstance(ensemble, list): try: models = [get_model_id(model) for model in ensemble] except ValueError: raise ValueError('Failed to verify the list of models. Check ' 'your model id values.') self.distributions = None else: self.ensemble_id = get_ensemble_id(ensemble) ensemble = check_resource(ensemble, self.api.get_ensemble) models = ensemble['object']['models'] self.distributions = ensemble['object'].get('distributions', None) self.model_ids = models self.fields = self.all_model_fields() number_of_models = len(models) if max_models is None: self.models_splits = [models] else: self.models_splits = [models[index:(index + max_models)] for index in range(0, number_of_models, max_models)] if len(self.models_splits) == 1: models = [retrieve_resource(self.api, model_id, query_string=ONLY_MODEL) for model_id in self.models_splits[0]] self.multi_model = MultiModel(models, self.api)
def retrieve_resource(api, resource_id, query_string='limit=-1;', no_check_fields=False): """ Retrieves resource info either from a local repo or from the remote server """ if api.storage is not None: try: stored_resource = "%s%s%s" % (api.storage, os.sep, resource_id.replace("/", "_")) with open(stored_resource) as resource_file: resource = json.loads(resource_file.read()) # we check that the stored resource has enough fields information # for local predictions to work. Otherwise we should retrieve it. if no_check_fields or check_model_fields(resource): return resource except ValueError: raise ValueError("The file %s contains no JSON") except IOError: pass api_getter = api.getters[get_resource_type(resource_id)] resource = check_resource(resource_id, api_getter, query_string) return resource
def i_check_source_exists_by_id(step, source_id): source = check_resource(source_id, world.api.get_source) if (source['code'] != HTTP_NOT_FOUND): assert True world.source = source else: assert False
def i_check_dataset_exists_by_id(step, dataset_id): dataset = check_resource(dataset_id, world.api.get_dataset) if (dataset['code'] != HTTP_NOT_FOUND): assert True world.dataset = dataset else: assert False
def local_batch_predict(models, headers, test_reader, exclude, fields, resume, output_path, max_models, number_of_tests, api, output, verbosity, method, objective_field, session_file, debug): """Get local predictions form partial Multimodel, combine and save to file """ def draw_progress_bar(current, total): """Draws a text based progress report. """ pct = 100 - ((total - current) * 100) / (total) console_log("Predicted on %s out of %s models [%s%%]" % ( localize(current), localize(total), pct)) models_total = len(models) models_splits = [models[index:(index + max_models)] for index in range(0, models_total, max_models)] input_data_list = [] for row in test_reader: for index in exclude: del row[index] input_data_list.append(fields.pair(row, headers, objective_field)) total_votes = [] models_count = 0 for models_split in models_splits: if resume: for model in models_split: pred_file = get_predictions_file_name(model, output_path) u.checkpoint(u.are_predictions_created, pred_file, number_of_tests, debug=debug) complete_models = [] for index in range(len(models_split)): complete_models.append(api.check_resource( models_split[index], api.get_model)) local_model = MultiModel(complete_models) local_model.batch_predict(input_data_list, output_path, reuse=True) votes = local_model.batch_votes(output_path) models_count += max_models if models_count > models_total: models_count = models_total if verbosity: draw_progress_bar(models_count, models_total) if total_votes: for index in range(0, len(votes)): predictions = total_votes[index].predictions predictions.extend(votes[index].predictions) else: total_votes = votes message = u.dated("Combining predictions.\n") u.log_message(message, log_file=session_file, console=verbosity) for multivote in total_votes: u.write_prediction(multivote.combine(method), output)
def reify_resources(args, api, logger): """ Extracts the properties of the created resources and generates code to rebuild them """ resource_id = get_resource_id(args.resource_id) if resource_id is None: sys.exit("Failed to match a valid resource ID. Please, check: %s" % args.resource_id) # check whether the resource exists try: check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the resource %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id)
def i_check_create_fusion(step): fs_file = "%s%sfusions" % (world.directory, os.sep) try: fs_file = open(fs_file, "r") fs = check_resource(fs_file.readline().strip(), world.api.get_fusion) world.fusions.append(fs['resource']) world.fusion = fs fs_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_pca_model(step): pca_file = "%s%spcas" % (world.directory, os.sep) try: pca_file = open(pca_file, "r") pca = check_resource(pca_file.readline().strip(), world.api.get_pca) world.pcas.append(pca['resource']) world.pca = pca pca_file.close() except Exception, exc: assert False, str(exc)
def upload_source(self,filename): """ Upload a sourcefile to BigML. Return resource value. """ assert self.authenticated, 'Not authenticated!' # check if source file has already been uploaded query_string = 'name={}'.format(filename) matching_sources = self.api.list_sources(query_string)['objects'] if len(matching_sources) > 0: source = matching_sources[0] self.logger.info('{0} is already present in BigML'.format(basename(filename))) else: self.logger.info('uploading source to BigML...') source = self.api.create_source(filename,{'name':filename}) # enter polling loop until source becomes ready check_resource(source['resource'],self.api.get_source) return source['resource']
def i_check_create_time_series(step): ts_file = "%s%stime_series" % (world.directory, os.sep) try: ts_file = open(ts_file, "r") ts = check_resource(ts_file.readline().strip(), world.api.get_time_series) world.time_series_set.append(ts['resource']) world.time_series = ts ts_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_source(step): source_file = "%s%ssource" % (world.directory, os.sep) try: source_file = open(source_file, "r") source = check_resource(source_file.readline().strip(), world.api.get_source) world.sources.append(source["resource"]) world.source = source source_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_ensemble(step): ensemble_file = "%s%sensembles" % (world.directory, os.sep) try: ensemble_file = open(ensemble_file, "r") ensemble = check_resource(ensemble_file.readline().strip(), world.api.get_ensemble) world.ensembles.append(ensemble["resource"]) world.ensemble = ensemble ensemble_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_project(step, organization=False): project_file = "%s%sproject" % (world.directory, os.sep) try: project_file = open(project_file, "r") project = check_resource(project_file.readline().strip(), world.api.get_project) world.projects.append(project['resource']) world.project = project project_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_script(step): script_file = "%s%sscripts" % (world.directory, os.sep) try: script_file = open(script_file, "r") script = check_resource(script_file.readline().strip(), world.api.get_script) world.scripts.append(script['resource']) world.script = script script_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_evaluation(step): evaluation_file = "%s%sevaluations" % (world.directory, os.sep) try: evaluation_file = open(evaluation_file, "r") evaluation = check_resource(evaluation_file.readline().strip(), world.api.get_evaluation) world.evaluations.append(evaluation["resource"]) world.evaluation = evaluation evaluation_file.close() assert True except: assert False
def i_check_create_multi_dataset(step): dataset_file = "%s%sdataset_multi" % (world.directory, os.sep) try: with open(dataset_file, "r") as dataset_file_handler: dataset_id = dataset_file_handler.readline().strip() dataset = check_resource(dataset_id, world.api.get_dataset) world.datasets.append(dataset['resource']) world.dataset = dataset assert True except Exception, exc: assert False, str(exc)
def i_check_create_library(step): library_file = os.path.join(world.directory, "library") try: library_file = open(library_file, "r") library = check_resource(library_file.readline().strip(), world.api.get_library) world.libraries.append(library['resource']) world.library = library library_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_model(step): model_file = "%s%smodels" % (world.directory, os.sep) try: model_file = open(model_file, "r") model = check_resource(model_file.readline().strip(), world.api.get_model) world.models.append(model["resource"]) world.model = model model_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_batch_anomaly_score_dataset(step): dataset_file = "%s%sbatch_anomaly_score_dataset" % (world.directory, os.sep) try: dataset_file = open(dataset_file, "r") dataset = check_resource(dataset_file.readline().strip(), api=world.api) world.datasets.append(dataset['resource']) dataset_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_dn_model(step): dn_file = "%s%sdeepnets" % (world.directory, os.sep) try: dn_file = open(dn_file, "r") dn = check_resource(dn_file.readline().strip(), world.api.get_deepnet) world.deepnets.append(dn['resource']) world.deepnet = dn dn_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_external_connector(step): connector_file = "%s%sexternal_connector" % (world.directory, os.sep) try: connector_file = open(connector_file, "r") connector = check_resource(connector_file.readline().strip(), world.api.get_external_connector) world.external_connectors.append(connector['resource']) world.external_connector = connector connector_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_dataset(step, dataset_type=None): dataset_file = "%s%sdataset_%s" % (world.directory, os.sep, dataset_type) try: dataset_file = open(dataset_file, "r") dataset = check_resource(dataset_file.readline().strip(), world.api.get_dataset) world.datasets.append(dataset['resource']) world.dataset = dataset dataset_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_cluster(step): cluster_file = "%s%sclusters" % (world.directory, os.sep) try: cluster_file = open(cluster_file, "r") cluster = check_resource(cluster_file.readline().strip(), world.api.get_cluster) world.clusters.append(cluster['resource']) world.cluster = cluster cluster_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_lr_model(step): lr_file = "%s%slinear_regressions" % (world.directory, os.sep) try: lr_file = open(lr_file, "r") lr = check_resource(lr_file.readline().strip(), world.api.get_linear_regression) world.linear_regressions.append(lr['resource']) world.linear_regression = lr lr_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_association(step): association_file = os.path.join(world.directory, "associations") try: association_file = open(association_file, "r") association = check_resource(association_file.readline().strip(), world.api.get_association) world.associations.append(association['resource']) world.association = association association_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_test_dataset(step): test_dataset_file = "%s%sdataset_test" % (world.directory, os.sep) try: test_dataset_file = open(test_dataset_file, "r") test_dataset = check_resource(test_dataset_file.readline().strip(), world.api.get_dataset) world.datasets.append(test_dataset['resource']) world.test_dataset = test_dataset test_dataset_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_sample(step): sample_file = "%s%ssamples" % (world.directory, os.sep) try: sample_file = open(sample_file, "r") sample = check_resource(sample_file.readline().strip(), world.api.get_sample) world.samples.append(sample['resource']) world.sample = sample sample_file.close() assert True except Exception, exc: assert False, str(exc)
def train_model(self, inputs, outputs, train): # Create a file with the trained data f = open("./data_train.csv", "w") for x0, y0 in zip(inputs[train],outputs[train]): y0 = np.array(y0) line = ",".join(np.insert(x0, len(x0), y0)) f.write(line+"\n") f.close() # Use the training file created previously to train a BigML model source = check_resource(self.api.create_source('./data_train.csv', { 'term_analysis' : {"enabled": False}, 'source_parser' : {"locale": "en-US"} }), self.api.get_source) dataset = check_resource(self.api.create_dataset(source), self.api.get_dataset) model = check_resource(self.api.create_model(dataset), self.api.get_model) local_model = Model(model) return [source,dataset, model, local_model]
def i_check_create_anomaly(step): anomaly_file = "%s%sanomalies" % (world.directory, os.sep) try: anomaly_file = open(anomaly_file, "r") anomaly = check_resource(anomaly_file.readline().strip(), api=world.api) world.anomalies.append(anomaly['resource']) world.anomaly = anomaly anomaly_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_test_source(step): test_source_file = "%s%ssource_test" % (world.directory, os.sep) try: test_source_file = open(test_source_file, "r") test_source = check_resource(test_source_file.readline().strip(), world.api.get_source) world.sources.append(test_source['resource']) world.test_source = test_source test_source_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_package_script(step, package_dir=None): script_file = os.path.join(world.directory, os.path.basename(package_dir), "scripts") try: script_file = open(script_file, "r") script = check_resource(script_file.readline().strip(), world.api.get_script) world.scripts.append(script['resource']) world.script = script script_file.close() except Exception, exc: assert False, str(exc)
def i_check_create_topic_model(step): topic_model_file = "%s%stopic_models" % (world.directory, os.sep) try: topic_model_file = open(topic_model_file, "r") topic_model = check_resource(topic_model_file.readline().strip(), world.api.get_topic_model) world.topic_models.append(topic_model['resource']) world.topic_model = topic_model topic_model_file.close() assert True except Exception, exc: assert False, str(exc)
def i_check_create_batch_prediction(step): batch_prediction_file = "%s%sbatch_prediction" % (world.directory, os.sep) try: batch_prediction_file = open(batch_prediction_file, "r") batch_prediction = check_resource(batch_prediction_file.readline().strip(), world.api.get_batch_prediction) world.batch_predictions.append(batch_prediction['resource']) world.batch_prediction = batch_prediction batch_prediction_file.close() assert True except Exception, exc: assert False, str(exc)