Пример #1
0
def train():
    try:
        print("starting training...")
        hyperparameters = load_json_object(hyperparameters_file_path)
        print("\nHyperparameters configuration:")
        print_json_object(hyperparameters)

        input_data_config = load_json_object(inputdataconfig_file_path)
        print("\nInput data configuration:")
        print_json_object(input_data_config)

        for key in input_data_config:
            print("\nList of files in {0} channel: ".format(key))
            channel_path = data_files_path + key + "/"
            print_files_in_path(channel_path)

        if os.path.exists(resource_file_path):
            resource_config = load_json_object(resource_file_path)
            print("\nResource configuration:")
            print_json_object(resource_config)

        # Take the set of files and read them all into a single pandas dataframe
        input_files = [
            os.path.join(data_files_path + "train/", file)
            for file in os.listdir(data_files_path + "train/")
        ]
        if len(input_files) == 0:
            raise ValueError((
                "There are no files in {}.\n" +
                "This usually indicates that the channel ({}) was incorrectly specified,\n"
                +
                "the data specification in S3 was incorrectly specified or the role specified\n"
                + "does not have permission to access the data.").format(
                    data_files_path + "train/", "train"))

        concat_data = load_raw(input_files, [label_column, feature_column])

        print(concat_data.info())

        preprocessor = CountVectorizer(analyzer=set)
        print("fitting...")
        preprocessor.fit(concat_data[feature_column])
        print("finished fitting...")

        feature_column_names = preprocessor.get_feature_names()
        print(feature_column_names)

        le = LabelEncoder()
        le.fit(concat_data[label_column])
        print("le classes: ", le.classes_)

        dump(preprocessor, os.path.join(model_artifacts_path, "model.joblib"))
        dump(le, os.path.join(model_artifacts_path, "label.joblib"))

        print("saved model!")
    except Exception as e:
        write_failure_file(failure_file_path, str(e))
        print(e, file=sys.stderr)
        sys.exit(1)
Пример #2
0
def train():
    try:
        print("\nRunning training...")

        if os.path.exists(hyperparameters_file_path):
            hyperparameters = load_json_object(hyperparameters_file_path)
            print('\nHyperparameters configuration:')
            print_json_object(hyperparameters)

        if os.path.exists(inputdataconfig_file_path):
            input_data_config = load_json_object(inputdataconfig_file_path)
            print('\nInput data configuration:')
            print_json_object(input_data_config)

            for key in input_data_config:
                print('\nList of files in {0} channel: '.format(key))
                channel_path = data_files_path + key + '/'
                print_files_in_path(channel_path)

        if os.path.exists(resource_file_path):
            resource_config = load_json_object(resource_file_path)
            print('\nResource configuration:')
            print_json_object(resource_config)

        if (training_job_name_env in os.environ):
            print("\nTraining job name: ")
            print(os.environ[training_job_name_env])

        if (training_job_arn_env in os.environ):
            print("\nTraining job ARN: ")
            print(os.environ[training_job_arn_env])

        # This object is used to handle SIGTERM and SIGKILL signals.
        signal_handler = ExitSignalHandler()

        # Dummy net.
        net = None

        # Run training loop.
        epochs = 1
        for x in range(epochs):
            print("\nRunning epoch {0}...".format(x))

            time.sleep(10)

            if (signal_handler.exit_now):
                print(
                    "Received SIGTERM/SIGINT. Saving training state and exiting."
                )
                # Save state here.
                save_model_artifacts(model_artifacts_path, net)
                sys.exit(0)

            print("Completed epoch {0}.".format(x))

        # At the end of the training loop, we have to save model artifacts.
        save_model_artifacts(model_artifacts_path, net)

        print("\nTraining completed!")
    except Exception as e:
        write_failure_file(failure_file_path, str(e))
        print(e, file=sys.stderr)
        sys.exit(1)