Exemplo n.º 1
0
def export_dataset(dataset, api, args, resume,
                   session_file=None, path=None):
    """Exports the dataset to a CSV file given by the user or a filename
       based on the dataset id by default.

    """
    filename = csv_name(args.to_csv, path, dataset)
    if resume:
        resume = c.checkpoint(
            c.is_dataset_exported, filename,
            debug=args.debug)
        if not resume:
            message = u.dated("No dataset exported. Resuming.\n")
            u.log_message(message, log_file=session_file,
                          console=args.verbosity)
    else:
        message = u.dated("Exporting dataset to CSV file: %s\n" % filename)
        u.log_message(message, log_file=session_file,
                      console=args.verbosity)

    if not resume:
        file_name = api.download_dataset(dataset, filename=filename)
        if file_name is None:
            sys.exit("Failed downloading CSV.")
    return resume
Exemplo n.º 2
0
def export_dataset(dataset, api, args, resume, session_file=None, path=None):
    """Exports the dataset to a CSV file given by the user or a filename
       based on the dataset id by default.

    """
    filename = csv_name(args.to_csv, path, dataset)
    if resume:
        resume = c.checkpoint(c.is_dataset_exported,
                              filename,
                              debug=args.debug)
        if not resume:
            message = u.dated("No dataset exported. Resuming.\n")
            u.log_message(message,
                          log_file=session_file,
                          console=args.verbosity)
    else:
        message = u.dated("Exporting dataset to CSV file: %s\n" % filename)
        u.log_message(message, log_file=session_file, console=args.verbosity)

    if not resume:
        api.download_dataset(dataset, filename=filename)
    return resume
Exemplo n.º 3
0
def remote_predict(model,
                   test_dataset,
                   batch_prediction_args,
                   args,
                   api,
                   resume,
                   prediction_file=None,
                   session_file=None,
                   path=None,
                   log=None):
    """Computes a prediction for each entry in the `test_set`.

    Predictions are computed remotely using the batch predictions call.
    """
    if args.ensemble is not None and not args.dataset_off:
        model_or_ensemble = args.ensemble
    elif args.dataset_off:
        if hasattr(args, "ensemble_ids_") and args.ensemble_ids_:
            models = args.ensemble_ids_
        else:
            models = args.model_ids_
        test_datasets = args.test_dataset_ids
    else:
        model_or_ensemble = bigml.api.get_model_id(model)
    # if resuming, try to extract dataset form log files
    if resume:
        message = u.dated("Batch prediction not found. Resuming.\n")
        resume, batch_prediction = c.checkpoint(c.is_batch_prediction_created,
                                                path,
                                                debug=args.debug,
                                                message=message,
                                                log_file=session_file,
                                                console=args.verbosity)
    if not resume:
        if not args.dataset_off:
            batch_prediction = create_batch_prediction(
                model_or_ensemble,
                test_dataset,
                batch_prediction_args,
                args,
                api,
                session_file=session_file,
                path=path,
                log=log)
        else:
            batch_predictions = []
            for index, test_dataset_n in enumerate(test_datasets):
                batch_predictions.append(create_batch_prediction( \
                    models[index], test_dataset_n, batch_prediction_args,
                    args, api, session_file=session_file, path=path, log=log))
    if not args.no_csv and not args.dataset_off:
        file_name = api.download_batch_prediction(batch_prediction,
                                                  prediction_file)
        if file_name is None:
            sys.exit("Failed downloading CSV.")
    if args.to_dataset and not args.dataset_off:
        batch_prediction = bigml.api.check_resource(batch_prediction, api=api)
        new_dataset = bigml.api.get_dataset_id(
            batch_prediction['object']['output_dataset_resource'])
        if new_dataset is not None:
            message = u.dated("Batch prediction dataset created: %s\n" %
                              u.get_url(new_dataset))
            u.log_message(message,
                          log_file=session_file,
                          console=args.verbosity)
            u.log_created_resources("batch_prediction_dataset",
                                    path,
                                    new_dataset,
                                    mode='a')
    elif args.to_dataset and args.dataset_off:
        predictions_datasets = []
        for batch_prediction in batch_predictions:
            batch_prediction = bigml.api.check_resource(batch_prediction,
                                                        api=api)
            new_dataset = bigml.api.get_dataset_id(
                batch_prediction['object']['output_dataset_resource'])
            if new_dataset is not None:
                predictions_datasets.append(new_dataset)
                message = u.dated("Batch prediction dataset created: %s\n" %
                                  u.get_url(new_dataset))
                u.log_message(message,
                              log_file=session_file,
                              console=args.verbosity)
                u.log_created_resources("batch_prediction_dataset",
                                        path,
                                        new_dataset,
                                        mode='a')
        multi_dataset = api.create_dataset(predictions_datasets)
        log_created_resources("dataset_pred",
                              path,
                              bigml.api.get_dataset_id(multi_dataset),
                              mode='a')
        dataset_id = check_resource_error(multi_dataset,
                                          "Failed to create dataset: ")
        try:
            multi_dataset = api.check_resource(multi_dataset)
        except ValueError, exception:
            sys.exit("Failed to get a finished dataset: %s" % str(exception))
        message = dated("Predictions dataset created: %s\n" %
                        get_url(multi_dataset))
        log_message(message, log_file=session_file, console=args.verbosity)
        log_message("%s\n" % dataset_id, log_file=log)
        if not args.no_csv:
            file_name = api.download_dataset(dataset_id, prediction_file)
            if file_name is None:
                sys.exit("Failed downloading CSV.")
Exemplo n.º 4
0
def remote_predict(model, test_dataset, batch_prediction_args, args,
                   api, resume, prediction_file=None, session_file=None,
                   path=None, log=None):
    """Computes a prediction for each entry in the `test_set`.

    Predictions are computed remotely using the batch predictions call.
    """
    if args.ensemble is not None and not args.dataset_off:
        model_or_ensemble = args.ensemble
    elif args.dataset_off:
        if hasattr(args, "ensemble_ids_") and args.ensemble_ids_:
            models = args.ensemble_ids_
        else:
            models = args.model_ids_
        test_datasets = args.test_dataset_ids
    else:
        model_or_ensemble = bigml.api.get_model_id(model)
    # if resuming, try to extract dataset form log files
    if resume:
        message = u.dated("Batch prediction not found. Resuming.\n")
        resume, batch_prediction = c.checkpoint(
            c.is_batch_prediction_created, path, debug=args.debug,
            message=message, log_file=session_file, console=args.verbosity)
    if not resume:
        if not args.dataset_off:
            batch_prediction = create_batch_prediction(
                model_or_ensemble, test_dataset, batch_prediction_args,
                args, api, session_file=session_file, path=path, log=log)
        else:
            batch_predictions = []
            for index, test_dataset_n in enumerate(test_datasets):
                batch_predictions.append(create_batch_prediction( \
                    models[index], test_dataset_n, batch_prediction_args,
                    args, api, session_file=session_file, path=path, log=log))
    if not args.no_csv and not args.dataset_off:
        file_name = api.download_batch_prediction(batch_prediction,
                                                  prediction_file)
        if file_name is None:
            sys.exit("Failed downloading CSV.")
    if args.to_dataset and not args.dataset_off:
        batch_prediction = bigml.api.check_resource(batch_prediction, api=api)
        new_dataset = bigml.api.get_dataset_id(
            batch_prediction['object']['output_dataset_resource'])
        if new_dataset is not None:
            message = u.dated("Batch prediction dataset created: %s\n"
                              % u.get_url(new_dataset))
            u.log_message(message, log_file=session_file,
                          console=args.verbosity)
            u.log_created_resources("batch_prediction_dataset",
                                    path, new_dataset, mode='a')
    elif args.to_dataset and args.dataset_off:
        predictions_datasets = []
        for batch_prediction in batch_predictions:
            batch_prediction = bigml.api.check_resource(batch_prediction,
                                                        api=api)
            new_dataset = bigml.api.get_dataset_id(
                batch_prediction['object']['output_dataset_resource'])
            if new_dataset is not None:
                predictions_datasets.append(new_dataset)
                message = u.dated("Batch prediction dataset created: %s\n"
                                  % u.get_url(new_dataset))
                u.log_message(message, log_file=session_file,
                              console=args.verbosity)
                u.log_created_resources("batch_prediction_dataset",
                                        path, new_dataset, mode='a')
        multi_dataset = api.create_dataset(predictions_datasets)
        log_created_resources("dataset_pred", path,
                              bigml.api.get_dataset_id(multi_dataset),
                              mode='a')
        dataset_id = check_resource_error(multi_dataset,
                                          "Failed to create dataset: ")
        try:
            multi_dataset = api.check_resource(multi_dataset)
        except ValueError, exception:
            sys.exit("Failed to get a finished dataset: %s" % str(exception))
        message = dated("Predictions dataset created: %s\n" %
                        get_url(multi_dataset))
        log_message(message, log_file=session_file, console=args.verbosity)
        log_message("%s\n" % dataset_id, log_file=log)
        if not args.no_csv:
            file_name = api.download_dataset(dataset_id, prediction_file)
            if file_name is None:
                sys.exit("Failed downloading CSV.")