def test_experiment_sequence_combiner(csv_filename): # Sequence combiner input_features = [ sequence_feature( name='english', min_len=5, max_len=5, encoder='rnn', cell_type='lstm', reduce_output=None ), sequence_feature( name='spanish', min_len=5, max_len=5, encoder='rnn', cell_type='lstm', reduce_output=None ), categorical_feature(vocab_size=5) ] output_features = [ categorical_feature(reduce_input='sum', vocab_size=5) ] model_definition = { 'input_features': input_features, 'output_features': output_features, 'training': { 'epochs': 2 }, 'combiner': { 'type': 'sequence_concat', 'encoder': 'rnn', 'main_sequence_feature': 'random_sequence' } } # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in ENCODERS[:-2]: logger.error('sequence combiner. encoders: {0}, {1}'.format( encoder, encoder )) input_features[0]['encoder'] = encoder input_features[1]['encoder'] = encoder model_definition['input_features'] = input_features exp_dir_name = experiment( model_definition, skip_save_processed_input=False, skip_save_progress=True, skip_save_unprocessed_output=True, data_csv=rel_path ) shutil.rmtree(exp_dir_name, ignore_errors=True)
def test_experiment_multi_input_intent_classification(csv_filename): # Multiple inputs, Single category output input_features = [ text_feature(vocab_size=10, min_len=1, representation='sparse'), categorical_feature(vocab_size=10, loss='sampled_softmax_cross_entropy') ] output_features = [categorical_feature(vocab_size=2, reduce_input='sum')] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in ENCODERS: input_features[0]['encoder'] = encoder run_experiment(input_features, output_features, data_csv=rel_path)
def test_experiment_model_resume(csv_filename): # Single sequence input, single category output # Tests saving a model file, loading it to rerun training and predict input_features = [sequence_feature(encoder='rnn', reduce_output='sum')] output_features = [categorical_feature(vocab_size=2, reduce_input='sum')] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) model_definition = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, 'training': { 'epochs': 2 } } exp_dir_name = experiment(model_definition, data_csv=rel_path) logging.info('Experiment Directory: {0}'.format(exp_dir_name)) experiment(model_definition, data_csv=rel_path, model_resume_path=exp_dir_name) full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path) shutil.rmtree(exp_dir_name, ignore_errors=True)
def test_experiment_tied_weights(csv_filename): # Single sequence input, single category output input_features = [ text_feature( name='text_feature1', min_len=1, encoder='cnnrnn', reduce_output='sum' ), text_feature( name='text_feature2', min_len=1, encoder='cnnrnn', reduce_output='sum', tied_weights='text_feature1' ) ] output_features = [categorical_feature(vocab_size=2, reduce_input='sum')] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in ENCODERS: input_features[0]['encoder'] = encoder input_features[1]['encoder'] = encoder run_experiment(input_features, output_features, data_csv=rel_path)
def test_api_intent_classification(csv_filename): # Single sequence input, single category output input_features = [sequence_feature(reduce_output='sum')] output_features = [categorical_feature(vocab_size=2, reduce_input='sum')] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in ENCODERS: input_features[0]['encoder'] = encoder run_api_experiment(input_features, output_features, data_csv=rel_path)
def test_experiment_datetime_feature(csv_filename): input_features = [date_feature()] output_features = [categorical_feature(vocab_size=2)] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) encoders = ['wave', 'embed'] for encoder in encoders: input_features[0]['encoder'] = encoder run_experiment(input_features, output_features, data_csv=rel_path)
def test_experiment_multiple_seq_seq(csv_filename): # Multiple inputs, Multiple outputs input_features = [ text_feature(vocab_size=100, min_len=1, encoder='stacked_cnn'), numerical_feature(), categorical_feature(vocab_size=10, embedding_size=5), set_feature(), sequence_feature(vocab_size=10, max_len=10, encoder='embed') ] output_features = [ categorical_feature(vocab_size=2, reduce_input='sum'), sequence_feature(vocab_size=10, max_len=5), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Use generator as decoder output_features = [ categorical_feature(vocab_size=2, reduce_input='sum'), sequence_feature(vocab_size=10, max_len=5, decoder='generator'), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Generator decoder and reduce_input = None output_features = [ categorical_feature(vocab_size=2, reduce_input='sum'), sequence_feature(max_len=5, decoder='generator', reduce_input=None), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path)
def test_experiment_image_inputs(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Resnet encoder input_features = [ image_feature( folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3 }, fc_size=16, num_filters=8 ), text_feature(encoder='embed', min_len=1), numerical_feature(normalization='zscore') ] output_features = [ categorical_feature(vocab_size=2, reduce_input='sum'), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Stacked CNN encoder input_features[0]['encoder'] = 'stacked_cnn' rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Stacked CNN encoder, in_memory = False input_features[0]['preprocessing']['in_memory'] = False rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_server_integration(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Resnet encoder input_features = [ image_feature(folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3 }, fc_size=16, num_filters=8), text_feature(encoder='embed', min_len=1), numerical_feature(normalization='zscore') ] output_features = [ categorical_feature(vocab_size=2, reduce_input='sum'), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) model = train_model(input_features, output_features, data_csv=rel_path) app = server(model) client = TestClient(app) response = client.post('/predict') assert response.json() == ALL_FEATURES_PRESENT_ERROR data_df = read_csv(rel_path) data, files = convert_to_form(data_df.T.to_dict()[0]) response = client.post('/predict', data=data, files=files) response_keys = sorted(list(response.json().keys())) assert response_keys == sorted(output_keys_for(output_features)) shutil.rmtree(model.exp_dir_name, ignore_errors=True) shutil.rmtree(image_dest_folder)