def create_data_spec(layers, classification_problem, max_seq_len): ''' Create data specification corresponding to BERT model Parameters ---------- layers: dictionary Specifies the input/task layers. classification_problem: boolean Specifies whether the problem is classfication (True) or regression (False). max_seq_len: int Specifies the maximum target sequence length. Returns ------- data specification ''' # tokenization, position, and segment embedding tokens = [BertCommon['variable_names']['token_var']] position = [BertCommon['variable_names']['position_var']] segment = [BertCommon['variable_names']['segment_var']] # target variables target = generate_target_var_names(BertCommon['variable_names'], max_seq_len) nominals = generate_target_var_names(BertCommon['variable_names'], max_seq_len) data_spec = [] data_spec.append( DataSpec(type_='TEXT', layer=layers['token_input'], data=tokens)) data_spec.append( DataSpec(type_='TEXT', layer=layers['position_input'], data=position)) if 'segment_input' in layers: data_spec.append( DataSpec(type_='TEXT', layer=layers['segment_input'], data=segment)) if classification_problem: data_spec.append( DataSpec(type_='NUMNOM', layer=layers['task_layer'], data=target, nominals=nominals, numeric_nominal_parms=DataSpecNumNomOpts( length=BertCommon['variable_names']['target_len_var'], token_size=1))) else: data_spec.append( DataSpec(type_='NUMNOM', layer=layers['task_layer'], data=target, numeric_nominal_parms=DataSpecNumNomOpts( length=BertCommon['variable_names']['target_len_var'], token_size=1))) return data_spec
def test_model_conversion3(self): ''' Import CNN image classification model and override attributes - instantiate a Keras LeNet model and translate to DLPy/Viya model override CNN model attributes with RNN atttributes - never would be done in practice, just to verify that new attributes written NOTE: cannot attach weights unless both client and server share the same file system COVERAGE: from_keras_model(), load_weights() in network.py keras_to_sas() in sas_keras_parse.py write_keras_hdf5() in write_keras_model_parm.py all functions in model_conversion_utils.py CNN-related function in write_sas_code.py ''' if self.data_dir is None: unittest.TestCase.skipTest( self, "DLPY_DATA_DIR is not set in the environment variables") if (self.data_dir_local is None) or (not os.path.isfile( os.path.join(self.data_dir_local, 'lenet.h5'))): unittest.TestCase.skipTest( self, "DLPY_DATA_DIR_LOCAL is not set in the environment variables or lenet.h5 file is missing" ) if self.keras_installed: from keras.models import Sequential from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten else: unittest.TestCase.skipTest(self, "keras is not installed") model = Sequential() model.add( Conv2D(20, kernel_size=(5, 5), strides=(1, 1), activation='relu', input_shape=(28, 28, 1), padding="same")) model.add( MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')) model.add( Conv2D(50, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding='same')) model.add( MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')) model.add(Flatten()) model.add(Dense(500, activation='relu')) model.add(Dense(10, activation='softmax')) model.load_weights(os.path.join(self.data_dir_local, 'lenet.h5')) model.summary() model_name = 'lenet' model1, use_gpu = Model.from_keras_model( conn=self.s, keras_model=model, output_model_table=model_name, include_weights=True, scale=1.0 / 255.0, input_weights_file=os.path.join(self.data_dir_local, 'lenet.h5')) if os.path.isdir(self.data_dir): try: copyfile( os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5'), os.path.join(self.data_dir, 'lenet_weights.kerasmodel.h5')) copy_success = True except: print( 'Unable to copy weights file, skipping test of overriding attributes' ) copy_success = False if copy_success: self.s.table.addcaslib(activeonadd=False, datasource={'srctype': 'path'}, name='MODEL_CONVERT', path=self.data_dir, subdirectories=True) model1.load_weights(path=os.path.join( self.data_dir, 'lenet_weights.kerasmodel.h5'), labels=False, use_gpu=use_gpu) os.remove( os.path.join(self.data_dir, 'lenet_weights.kerasmodel.h5')) # parameter for (nonexistent) RNN layers rnn_size = 10 feature_dim = 4 # output classes output_dim = 29 # maximum sequence length max_seq_len = 100 # define data specs needed to import Keras model weights tokensize = feature_dim inputs = [] for fi in range(max_seq_len): for vi in range(tokensize): inputs.append('_f%d_v%d_' % (fi, vi)) targets = ['y%d' % i for i in range(0, max_seq_len)] data_spec = [] data_spec.append( DataSpec(type_='NUMERICNOMINAL', layer=model.layers[0].name + "_input", data=inputs, numeric_nominal_parms=DataSpecNumNomOpts( length='_num_frames_', token_size=feature_dim))) data_spec.append( DataSpec(type_='NUMERICNOMINAL', layer=model.layers[-1].name, data=targets, nominals=targets, numeric_nominal_parms=DataSpecNumNomOpts( length='ylen', token_size=1))) # override model attributes from dlpy.attribute_utils import create_extended_attributes create_extended_attributes(self.s, model_name, model1.layers, data_spec) if os.path.isfile( os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5')): os.remove(os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5')) # clean up model table model_tbl_opts = input_table_check(model_name) self.s.table.droptable(quiet=True, **model_tbl_opts) # clean up models del model del model1
def test_model_conversion2(self): ''' Import RNN sequence to sequence models - instantiate Keras RNN models and translate to DLPy/Viya models NOTE: cannot attach weights unless both client and server share the same file system COVERAGE: from_keras_model(), load_weights() in network.py keras_to_sas() in sas_keras_parse.py write_keras_hdf5() in write_keras_model_parm.py all functions in model_conversion_utils.py CNN-related function in write_sas_code.py ''' if self.data_dir is None: unittest.TestCase.skipTest( self, "DLPY_DATA_DIR is not set in the environment variables") if (self.data_dir_local is None) or (not os.path.isfile( os.path.join(self.data_dir_local, 'lenet.h5'))): unittest.TestCase.skipTest( self, "DLPY_DATA_DIR_LOCAL is not set in the environment variables or lenet.h5 file is missing" ) if not self.keras_installed: unittest.TestCase.skipTest(self, "keras is not installed") # parameter for RNN layers rnn_size = 10 feature_dim = 4 # output classes output_dim = 29 # maximum sequence length max_seq_len = 100 # define data specs needed to import Keras model weights tokensize = feature_dim inputs = [] for fi in range(max_seq_len): for vi in range(tokensize): inputs.append('_f%d_v%d_' % (fi, vi)) targets = ['y%d' % i for i in range(0, max_seq_len)] data_spec = [] data_spec.append( DataSpec(type_='NUMERICNOMINAL', layer='the_input', data=inputs, numeric_nominal_parms=DataSpecNumNomOpts( length='_num_frames_', token_size=feature_dim))) data_spec.append( DataSpec(type_='NUMERICNOMINAL', layer='out', data=targets, nominals=targets, numeric_nominal_parms=DataSpecNumNomOpts(length='ylen', token_size=1))) # try all RNN model types for layer_type in [ 'simplernn', 'lstm', 'gru', 'cudnnlstm', 'cudnngru' ]: for bidirectional in [True, False]: model = define_keras_rnn_model(layer_type, bidirectional, rnn_size, feature_dim, output_dim) model_name = 'dlpy_model' model1, use_gpu = Model.from_keras_model( conn=self.s, keras_model=model, max_num_frames=max_seq_len, include_weights=True, output_model_table=model_name) model1.print_summary() # try to load weights, but skip any GPU-based models because worker/soloist may not have GPU if os.path.isdir(self.data_dir) and (not use_gpu): try: copyfile( os.path.join(os.getcwd(), 'dlpy_model_weights.kerasmodel.h5'), os.path.join(self.data_dir, 'dlpy_model_weights.kerasmodel.h5')) copy_success = True except: print( 'Unable to copy weights file, skipping test of attaching weights' ) copy_success = False if copy_success: model1.load_weights(path=os.path.join( self.data_dir, 'dlpy_model_weights.kerasmodel.h5'), labels=False, use_gpu=use_gpu) os.remove( os.path.join(self.data_dir, 'dlpy_model_weights.kerasmodel.h5')) else: print('GPU model, skipping test of attaching weights') if os.path.isfile( os.path.join(os.getcwd(), 'dlpy_model_weights.kerasmodel.h5')): os.remove( os.path.join(os.getcwd(), 'dlpy_model_weights.kerasmodel.h5')) # clean up models del model del model1 # clean up model table model_tbl_opts = input_table_check(model_name) self.s.table.droptable(quiet=True, **model_tbl_opts)