示例#1
0
def main(args):
    '''
    Module entry function

    args:
      args:list transformer parameters requested by user/

   '''

    logger.debug(f'input-dir {args.input_dir}')
    logger.debug(f'output-dir {args.output_dir}')
    logger.debug(f'model output dir {args.model_output_dir}')

    input_df = load_data_frame_from_directory(args.input_dir).data
    logger.debug(f'{input_df.describe()}\n shape{input_df.shape} ')

    pca_module = PCAModule(args)
    logger.debug(pca_module.pca_instance)

    output_df = pca_module.fit_transform(input_df)
    pca_module.log_metrics(input_df.columns)

    logger.debug(f'output shape {output_df.shape}')
    save_data_frame_to_directory(
        save_to=args.output_dir,
        data=output_df,
        schema=DataFrameSchema.data_frame_to_dict(output_df))

    save_model_to_directory(save_to=args.model_output_dir,
                            model_dumper=pca_module_dumper(data=pca_module))
示例#2
0
    def run(self,
            training_dataset_of_user_item_rating_triples: TransactionDataset,
            user_features: FeatureDataset,
            item_features: FeatureDataset,
            epochs: int,
            batch_size: int,
            wide_part_optimizer: OptimizerSelection,
            wide_optimizer_learning_rate: float,
            crossed_feature_dimension: int,
            deep_part_optimizer: OptimizerSelection,
            deep_optimizer_learning_rate: float,
            user_embedding_dimension: int,
            item_embedding_dimension: int,
            categorical_features_embedding_dimension: int,
            hidden_units: IntTuple,
            activation_function: ActivationFnSelection,
            dropout: float,
            batch_normalization: Boolean,
            trained_wide_and_deep_recommendation_model: str,
            mpi_support: bool = True):
        self.set_inputs_name(training_dataset_of_user_item_rating_triples, user_features=user_features,
                             item_features=item_features)
        self._validate_datasets(training_dataset_of_user_item_rating_triples, user_features=user_features,
                                item_features=item_features)
        self._preprocess(training_dataset_of_user_item_rating_triples, user_features=user_features,
                         item_features=item_features)

        hyper_params = WideNDeepModelHyperParams(epochs=epochs,
                                                 batch_size=batch_size,
                                                 wide_optimizer=wide_part_optimizer,
                                                 wide_lr=wide_optimizer_learning_rate,
                                                 deep_optimizer=deep_part_optimizer,
                                                 deep_lr=deep_optimizer_learning_rate,
                                                 hidden_units=hidden_units,
                                                 activation_fn=activation_function,
                                                 dropout=dropout,
                                                 batch_norm=batch_normalization,
                                                 crossed_dim=crossed_feature_dimension,
                                                 user_dim=user_embedding_dimension,
                                                 item_dim=item_embedding_dimension,
                                                 embed_dim=categorical_features_embedding_dimension)
        user_feature_builder = FeatureBuilder(ids=training_dataset_of_user_item_rating_triples.users,
                                              id_key="User",
                                              features=user_features, feat_key_suffix='user_feature')
        item_feature_builder = FeatureBuilder(ids=training_dataset_of_user_item_rating_triples.items,
                                              id_key="Item",
                                              features=item_features, feat_key_suffix='item_feature')
        model = WideNDeepModel(hyper_params=hyper_params, save_dir=None, user_feature_builder=user_feature_builder,
                               item_feature_builder=item_feature_builder, mpi_support=mpi_support)
        model.train(transactions=training_dataset_of_user_item_rating_triples)
        # trained_wide_and_deep_recommendation_model is trained model output path, and the variable name is
        # defined according to the module spec
        if model.hvd_rank == 0 or not model.mpi_support:
            save_model_to_directory(save_to=trained_wide_and_deep_recommendation_model, model=model)
示例#3
0
def entrance(save_model_path='/mnt/chjinche/test_data/detection/init_model',
             model_type='fasterrcnn_resnet50_fpn',
             pretrained=True):
    model_config = {
        'model_class': 'FasterRCNN',
        'model_type': model_type,
        'pretrained': pretrained
    }
    logger.info('Dump untrained model.')
    logger.info(f'Model config: {model_config}.')
    dumper = pickle_dumper(model_config, 'model_config.pkl')
    save_model_to_directory(save_model_path, dumper)
    logger.info('Finished.')
示例#4
0
def entrance(
        save_model_path='/mnt/chjinche/projects/saved_untrained_model_resnet',
        model_type='resnext101_32x8d',
        pretrained=True):
    model_config = {
        'model_class': 'ResNet',
        'model_type': model_type,
        'pretrained': pretrained
    }
    logger.info('Dump untrained model.')
    logger.info(f'Model config: {model_config}.')
    dumper = pickle_dumper(model_config, 'model_config.pkl')
    save_model_to_directory(save_model_path, dumper)
    logger.info('Finished.')
def entrance(save_model_path='../init_model',
             model_type='densenet201',
             pretrained=True,
             memory_efficient=False):
    model_config = {
        'model_class': 'DenseNet',
        'model_type': model_type,
        'pretrained': pretrained,
        'memory_efficient': memory_efficient
    }
    logger.info('Dump untrained model.')
    logger.info(f'Model config: {model_config}.')
    dumper = pickle_dumper(model_config, 'model_config.pkl')
    save_model_to_directory(save_model_path, dumper)
    logger.info('Finished.')
def main(args=None):
    '''
      Module entry point function
    '''

    seq_col = args.sequence_column
    id_col = args.identifier_column
    length_sensitive = args.length_sensitive
    kappa = args.kappa

    logger.debug(f'input-dir {args.input_dir}')
    logger.debug(f'sequence-column {seq_col}')
    logger.debug(f'identifier-column {id_col}')
    logger.debug(f'length-sensitive {length_sensitive}')
    logger.debug(f'kappa {args.kappa}')
    logger.debug(f'output-dir {args.output_dir}')
    logger.debug(f'model output dir {args.model_output_dir}')

    input_df = load_data_frame_from_directory(args.input_dir).data

    if input_df[seq_col].isnull().sum().sum() > 0:
        logger.debug(f'column {seq_col} contains missing values ')
        sys.exit(1)

    embedding_df, sgt = compute_embeddings(input_df, seq_col, kappa,
                                           length_sensitive, id_col)

    logger.debug(f'embedding shape {embedding_df.shape}')

    save_data_frame_to_directory(
        save_to=args.output_dir,
        data=embedding_df,
        schema=DataFrameSchema.data_frame_to_dict(embedding_df))

    save_model_to_directory(save_to=args.model_output_dir,
                            model_dumper=sgt_dumper(data=sgt))
示例#7
0
                        help="A string parameter.")
    parser.add_argument("--normalize", type=str)
    parser.add_argument("--time-decay", type=str)

    args, _ = parser.parse_known_args()

    input_df = load_data_frame_from_directory(args.input_path).data
    input_df[args.col_rating] = input_df[args.col_rating].astype(float)

    logger.debug(f"Shape of loaded DataFrame: {input_df.shape}")
    logger.debug(f"Cols of DataFrame: {input_df.columns}")

    model = SAR(
        col_user=args.col_user,
        col_item=args.col_item,
        col_rating=args.col_rating,
        col_timestamp=args.col_timestamp,
        normalize=strtobool(args.normalize),
        timedecay_formula=strtobool(args.time_decay),
    )

    start_time = time.time()

    model.fit(input_df)

    train_time = time.time() - start_time
    print("Took {} seconds for training.".format(train_time))

    save_model_to_directory(save_to=args.output_model,
                            model_dumper=joblib_dumper(data=model))