def ImageCaptioning(conn, model_name='image_captioning', num_blocks=3, neurons=50, rnn_type='LSTM', max_output_len=15): ''' Builds an RNN to be used for image captioning Parameters ---------- conn : CAS Specifies the CAS connection object. model_name : string, optional Specifies output name of the model Default: 'image_captioning' num_blocks : int, optional Specifies number of samelength recurrent layers Default : 3 neurons : int, optional Specifies number of neurons in each layer Default : 50 rnn_type : string, optional Specifies the type of the rnn layer. Possible Values: RNN, LSTM, GRU Default: LSTM max_output_len : int, optional Specifies max number of tokens to generate in the final layer (i.e. max caption length) Default : 15 Returns ------- :class:`CASTable` ''' if num_blocks < 1: raise DLPyError('num_blocks must be greater than 1') model = Sequential(conn, model_table=model_name) model.add(InputLayer(name='input')) print('InputLayer added named "input"') for i in range(num_blocks): model.add( Recurrent(n=neurons, init='msra', rnn_type=rnn_type, output_type='samelength')) model.add( Recurrent(n=neurons, init='msra', rnn_type=rnn_type, output_type='encoding')) model.add( Recurrent(n=neurons, init='msra', rnn_type=rnn_type, output_type='arbitrarylength', max_output_length=max_output_len)) model.add(OutputLayer(name='output')) print('OutputLayer added named "output"') return model
def reshape_caption_columns( conn, table, caption_col_name='Var', num_captions=5, ): ''' Reshapes table so there is only one caption per row of the table Parameters ---------- conn : CAS Specifies the CAS connection object. table : CASTable or string Specifies name of CASTable containing the merged captions, features, and objects caption_col_name : string, optional Specifies basename of columns that contain captions Default : 'Var' num_captions : int, optional Specifies number of captions per image Default : 5 Returns ------- :class:`CASTable` ''' # convert table to one caption per line columns = list(table.columns) if '{}0'.format(caption_col_name) not in columns: raise DLPyError( 'caption_col_name {} does not exist in the table'.format( caption_col_name)) capt_idx_start = columns.index('{}0'.format(caption_col_name)) # initialize new_tbl dictionary with columns new_tbl = dict() for c in columns: if caption_col_name not in c: new_tbl[c] = [] new_tbl['caption'] = [] # make list of rows containing only one caption each new_tbl_list = list() rows = (table.values).tolist() try: for row in rows: for i in range(num_captions): new_row = [] for j in range(len(row)): if j not in range(capt_idx_start, capt_idx_start + num_captions): new_row.append(row[j]) new_row.append(row[capt_idx_start + i]) new_tbl_list.append(new_row) except IndexError: raise DLPyError("Wrong number of captions specified") # add values to dictionary for row in new_tbl_list: cnt = 0 for key in new_tbl.keys(): new_tbl[key].append(row[cnt]) cnt += 1 # create CASTable from dictionary rnn_input = CASTable.from_dict(conn, new_tbl) return rnn_input
def create_captioning_table(conn, image_table, features_model, captions_file, obj_detect_model=None, word_embeddings_file=None, num_captions=5, dense_layer='fc7', captions_delimiter='\t', caption_col_name='Var', embeddings_delimiter='\t', n_threads=None, gpu=None): ''' Builds CASTable wtih all necessary info to train an image captioning model Parameters ---------- conn : CAS Specifies the CAS connection object. image_table: imageTable Specifies name of CASTable that contains images to be used for training features_model : dlpy Model object Specifies CNN model to use for extracting features captions_file : string Specifies absolute path to file containing image filenames and captions Client should have access to this file. obj_detect_model : CASTable or string, optional Specifies CASTable containing model parameters for the object detection model Default : None word_embeddings_file : string, optional Specifies full path to file containing pre-trained word vectors to be used for text generation. This file should be accessible from the client. Required if obj_detect_model is not None Default : None num_captions : int, optional Specifies number of captions for each image in the captions file Default : 5 dense_layer: string, optional Specifies layer from CNN model to extract features from Default : 'fc7' captions_delimiter : string, optional Specifies delimiter between filenames and captions in the image captions text file Default : '\t' caption_col_name : string, optional Specifies base name for column names for the columns containing captions Default : 'Var' embeddings_delimiter : string, optional Specifies delimiter used in word embeddings file Default : '\t' n_threads : int, optional Specifies the number of threads to use when scoring the table. All cores available used when nothing is set. Default : None gpu : Gpu, optional When specified, specifies which gpu to use when scoring the table. GPU=1 uses all available GPU devices and default parameters. Default : None Returns ------- :class:`CASTable` ''' # get all necessary tables image_features = get_image_features(conn, features_model, image_table, dense_layer) captions_table = create_captions_table(conn, captions_file, delimiter=captions_delimiter, caption_col_name=caption_col_name) # merge features and captions tables df1 = captions_table.to_frame() df2 = image_features.to_frame() captions_features = pd.merge(df1, df2, left_on='_filename_0', right_on='_filename_0', how='left') result = conn.upload_frame(captions_features, casout=dict(name='captions_features', replace=True)) # conn.dljoin(table=captions_table,annotatedTable=image_features, # id='_filename_0',casOut=dict(name='captions_features',replace=True)) # result = conn.CASTable('captions_features') if obj_detect_model is not None: if word_embeddings_file is None: raise DLPyError( "word_embeddings_file required for object detection") else: # resize images for object detection scoring detected_objects = create_embeddings_from_object_detection( conn, image_table, obj_detect_model, word_embeddings_file, word_delimiter=embeddings_delimiter, n_threads=n_threads, gpu=gpu) # conn.dljoin(table=dict(name='captions_features'),annotatedTable=detected_objects, # id='_filename_0',casOut=dict(name='obj_capt_feats',replace=True)) df1 = detected_objects.to_frame() df2 = result.to_frame() obj_capt_feat = pd.merge(df1, df2, left_on='_filename_0', right_on='_filename_0', how='left') result = conn.upload_frame(obj_capt_feat, casout=dict(name='full_table', replace=True)) final_table = reshape_caption_columns(conn, result, caption_col_name=caption_col_name, num_captions=num_captions) drop_columns = set(final_table.columns) - set( captions_table.columns) - set(image_features.columns) if obj_detect_model: drop_columns = set(drop_columns) - set(detected_objects.columns) drop_columns.remove('caption') final_table.drop(drop_columns, axis=1, inplace=True) return final_table
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ''' Convert keras model to sas models ''' import os from keras import backend as K from distutils.version import StrictVersion import keras from dlpy.utils import DLPyError if StrictVersion( keras.__version__) < '2.1.3' or StrictVersion( keras.__version__) > '2.1.6': raise DLPyError('This keras version ('+keras.__version__+') is not supported, ' 'please use a version >= 2.1.3 and <= 2.1.6') from .write_keras_model_parm import write_keras_hdf5 from .write_sas_code import (write_input_layer, write_convolution_layer, write_batch_norm_layer, write_pooling_layer, write_residual_layer, write_full_connect_layer, write_concatenate_layer, write_main_entry) computation_layer_classes = ['averagepooling2d', 'maxpooling2d', 'conv2d', 'dense', 'batchnormalization', 'add', 'concatenate', 'globalaveragepooling2d'] dropout_layer_classes = ['averagepooling2d', 'maxpooling2d', 'conv2d', 'dense'] class KerasParseError(ValueError): '''
def convert_one_audio_file(local_audio_file, converted_local_audio_file): ''' Convert a local audio file into a wav format that only contains 1 channel with 16 bits and 16K HZ. Parameters ---------- local_audio_file : string Local location to the audio file to be converted. converted_local_audio_file : string Local location to store the converted audio file Returns ------- None Raises ------ DLPyError If anything goes wrong, it complains and prints the appropriate message. ''' try: import soundfile as sf except (ModuleNotFoundError, ImportError): raise DLPyError('cannot import soundfile') audio_name = os.path.basename(local_audio_file) output_dir = os.path.dirname(converted_local_audio_file) required_sr = 16000 required_sw = 2 # check whether the audio file is a wave format audio_ext = os.path.splitext(audio_name)[-1] audio_name = os.path.splitext(audio_name)[0] if audio_ext.lower() != '.wav': audio_wav_file = output_dir + random_name(audio_name, 6) + '.wav' data, sampling_rate = sf.read(local_audio_file) sf.write(audio_wav_file, data, sampling_rate) else: audio_wav_file = local_audio_file # convert the wav file to the required format: 1 channel, 16 bits, and 16K HZ wave_reader, wave_params = read_audio(audio_wav_file) is_framerate_desired = check_framerate(wave_params, required_sr) is_sampwidth_desired = check_sampwidth(wave_params, required_sw) is_stereo = check_stereo(wave_params) if converted_local_audio_file == audio_wav_file: real_converted_local_audio_file = converted_local_audio_file + '.tmp' else: real_converted_local_audio_file = converted_local_audio_file with wave.open(real_converted_local_audio_file, "wb") as wave_writer: wave_writer.setnchannels(1) wave_writer.setframerate(required_sr) # 16 bits wave_writer.setsampwidth(2) wave_writer.setcomptype(wave_params.comptype, wave_params.compname) fragment = wave_reader.readframes(wave_params.nframes) # 1 channel if is_stereo: fragment = convert_stereo_to_mono(fragment, wave_params.sampwidth) # 16K HZ if not is_framerate_desired: fragment = convert_framerate(fragment, wave_params.sampwidth, 1, wave_params.framerate, required_sr) # 16 bits if not is_sampwidth_desired: fragment = convert_sampwidth(fragment, wave_params.sampwidth, required_sw) wave_writer.writeframes(fragment) wave_reader.close() # remove the temporary wav file if audio_wav_file != local_audio_file: os.remove(audio_wav_file) # rename the file to the desired one if real_converted_local_audio_file != converted_local_audio_file: os.replace(real_converted_local_audio_file, converted_local_audio_file)
def extract_pytorch_parms(pymodel, layer_name, layer_type, layer_dim, layer_keywords, extra_keywords=None): ''' Extract correct tensor(s) from a PyTorch model state dictionary Parameters ---------- pymodel : PyTorch model Specifies the PyTorch model object. layer_name: string Specifies the PyTorch layer name. layer_type: int Specifyies the layer type (see BertCommon for layer types). layer_dim: list of int Specifies the dimensions of a tensor. layer_keywords : list of strings Specifies the keywords to search for in the model dictionary. extra_keywords : list of strings or None, optional Specifies the extra keywords for a multi-head attention layer. This is mandatory for multi-head attention and any other layer(s) where there could be ambiguity between two layers with the same type. Default : None Returns ------- weight, bias parameters ''' ptensor_wgt = None ptensor_bias = None if layer_type == BertCommon['layer_types']['noparms']: pass elif layer_type == BertCommon['layer_types']['layernorm']: if extra_keywords is None: key_list = layer_keywords + ['weight'] else: key_list = layer_keywords + extra_keywords + ['weight'] # weights pname, ptensor_wgt = find_pytorch_tensor(pymodel, key_list, layer_dim) if pname == None: raise DLPyError('Cannot find weights for layer ' + layer_name) # bias key_list[-1] = 'bias' pname, ptensor_bias = find_pytorch_tensor(pymodel, key_list, layer_dim) if pname == None: print('NOTE: No bias for layer ' + layer_name) elif layer_type == BertCommon['layer_types']['dense']: if extra_keywords is None: key_list = layer_keywords + ['weight'] else: key_list = layer_keywords + extra_keywords + ['weight'] # weights pname, ptensor_wgt = find_pytorch_tensor(pymodel, key_list, layer_dim) if pname == None: raise DLPyError('Cannot find weights for layer ' + layer_name) # bias # NOTE: bias name and dimensions not unique in attention layer so construct bias tensor name from # weight tensor name bias_str = pname[0].replace('weight', 'bias') if bias_str in pymodel.state_dict(): ptensor_bias = [pymodel.state_dict()[bias_str].numpy()] else: print('NOTE: No bias for layer ' + layer_name) elif layer_type == BertCommon['layer_types']['mhattention']: # NOTE: for affine transformations, Pytorch uses a linear layer that implements # the following operation # # y = x*A^T + b # # where A is stored in the form (output_dimension, input_dimension) in the state dictionary # # SAS Deep Learning implements an affine transformation as follows # # y = A*x + b # # where A is stored in the form (output_dimension, input_dimension) in column-major order. # # For dense (fully-connected) layers, the key is the A matrix which is stored identically # in both cases so it can be imported directly without any manipulation. The Pytorch version of # Multi-head attention uses several linear layers in the implementation. The SAS Deep Learning # version is self-contained, and implements something like # # y = x*A + b # # for these linear layers. This means that the A matrices used by the SAS Deep Learning version of # multi-head attention must be transposed before importing. for ii, mha_keys in enumerate(extra_keywords): # weights pname, tmp_wgt = find_pytorch_tensor( pymodel, layer_keywords + mha_keys + ['weight'], layer_dim) if pname == None: raise DLPyError('Cannot find ' + str(mha_keys) + ' weights for layer ' + layer_name) else: if len(tmp_wgt) > 1: raise DLPyError('Multiple matches for ' + str(mha_keys) + ' weights for layer ' + layer_name) else: if ii == 0: tensor_wgt = np.transpose(tmp_wgt[0].copy()) else: tensor_wgt = np.concatenate( (tensor_wgt, np.transpose(tmp_wgt[0])), axis=1) # bias pname, tmp_bias = find_pytorch_tensor( pymodel, layer_keywords + mha_keys + ['bias'], [layer_dim[0]]) if pname == None: print('NOTE: No ' + str(mha_keys) + ' bias for layer ' + layer_name) else: if 'tensor_bias' in locals(): tensor_bias = np.concatenate((tensor_bias, tmp_bias[0])) else: tensor_bias = tmp_bias[0].copy() ptensor_wgt = [tensor_wgt] if 'tensor_bias' in locals(): ptensor_bias = [tensor_bias] else: raise DLPyError('Layer ' + layer_name + ' is an unsupported layer type') return ptensor_wgt, ptensor_bias
def bert_prepare_data(conn, tokenizer, max_seq_len, input_a, segment_vocab_size=None, input_b=None, target=None, obs_weight=None, extra_var=None, neutral_label=None, train_fraction=None, classification_problem=True, seed=777777777, verbose=False): ''' Prepare data for a BERT model variant Parameters ---------- conn : CAS Connection Specifies the CAS connection tokenizer : :class:PreTrainedTokenizer object Specifies the tokenizer. max_seq_len: int Specifies the maximum sequence length (maximum number of tokens). input_a : list of strings Specifies the text data for a single segment task. segment_vocab_size : int Specifies the segment vocabulary size. The value should be one of 0 for DistilBERT, 1 for RoBERTa, or 2 for BERT. Default: None input_b : list of strings, optional Specifies the text data for a two segment task. Default: None target: list or list of lists, optional Specifies the target data. Target data must be a numeric type. This means that nominal values must be translated to integer class levels. Default: None obs_weight: list of float/integers Specifies the observation weights. Default: None extra_var: list of dictionaries Specifies the extra variable(s) to include in the Viya table(s). Each dictionary in the list must have the following keys name: string, specifies the name of the extra variable values: list, specifies the variable values type: string, must be either VARCHAR for characer values or NUMERIC for numeric values Default: None neutral_label: string, optional Specifies the "don't care" or neutral target label for multi-target classification tasks. This is not optional if target is a list of lists. Default: None train_fraction: float, optional Specifies the fraction of the data used for training. Must be between 0.0 and 1.0. Default: None classification_problem: boolean, optional Specifies whether the data is for a classification or regression problem. Default: True seed: int, optional Specifies the seed to use for the random number generator for splitting data into train and test data sets. Default: 777777777 verbose: boolean, optional Specifies whether progress messages and summary statistics are displayed. Default: False Returns ------- -> number of target variables (if target specified) or None. -> if train fraction specified : names of the Viya tables that hold the training and test/validation data sets otherwise : name of the data set ''' # define input variables ds_vars = dict(token_var=BertCommon['variable_names']['token_var'], position_var=BertCommon['variable_names']['position_var'], segment_var=BertCommon['variable_names']['segment_var']) # error checking if not isinstance(input_a, list): raise DLPyError('Input A must be a list') if input_b is not None: if not isinstance(input_b, list): raise DLPyError('Input B must be a list') if len(input_a) != len(input_b): raise DLPyError("Mismatch in lengths of input A and input B lists") if target is not None: if not isinstance(target, list): raise DLPyError('Target must be a list') if len(input_a) != len(target): raise DLPyError("Mismatch in lengths of input A and target lists") # target variable and length variable ds_vars['target_var'] = BertCommon['variable_names']['target_var'] ds_vars['target_len_var'] = BertCommon['variable_names'][ 'target_len_var'] if obs_weight is not None: if not isinstance(obs_weight, list): raise DLPyError('Observation weights must be a list') if len(input_a) != len(obs_weight): raise DLPyError( "Mismatch in lengths of input A and observation weight lists") if target is None: raise DLPyError("Weight specified without target variable.") # weight variable ds_vars['weight_var'] = BertCommon['variable_names']['weight_var'] if extra_var is not None: extra_var_names = [None] * len(extra_var) extra_var_types = [None] * len(extra_var) if not isinstance(extra_var, list): raise DLPyError('Extra variables must be a list') for ii, ev_dict in enumerate(extra_var): if not isinstance(ev_dict, dict): raise DLPyError( 'Argument extra_var must be a list of dictionaries') if 'name' in ev_dict: extra_var_names[ii] = ev_dict['name'] else: raise DLPyError('extra_var[' + str(ii) + '] missing "name" key.') if ('type' in ev_dict) and (ev_dict['type'].upper() in ['VARCHAR', 'NUMERIC']): extra_var_types[ii] = ev_dict['type'].upper() else: raise DLPyError( 'extra_var[' + str(ii) + '] missing "type" key, or an invalid type was specified.') if ('values' not in ev_dict) or ( not (isinstance(ev_dict['values'], list) and (len(input_a) == len(ev_dict['values'])))): raise DLPyError( 'extra_var[' + str(ii) + '] missing "values" key, the values are not a list object, ' 'or there is a mismatch in lengths of input A and values lists.' ) else: extra_var_names = None extra_var_types = None if (train_fraction is not None) and ((train_fraction < 0.0) or (train_fraction > 1.0)): raise DLPyError('train_fraction must be between 0 and 1') if segment_vocab_size is None: raise DLPyError( "You must specify a segment vocabulary size. See the Bert model " "configuration object (e.g. BertConfig['type_vocab_size'] for the " "correct value.") else: if segment_vocab_size not in [0, 1, 2]: raise DLPyError('Vocabulary size ' + str(segment_vocab_size) + ' is invalid. ' 'The value must be 0, 1, or 2.') # initialize lists token_strings = [None] * len(input_a) position_strings = [None] * len(input_a) if segment_vocab_size > 0: segment_strings = [None] * len(input_a) if target is not None: target_array = [None] * len(input_a) tgtlen_array = [None] * len(input_a) if obs_weight is not None: weight_array = [None] * len(input_a) if extra_var is not None: extra_var_array = [None] * len(input_a) num_truncated = 0 obs_idx = 0 ten_percent = int(0.1 * len(input_a)) multiple_targets = False for ii, txt_a in enumerate(input_a): # provide feedback if verbose: if (ii > 0) and (ii % ten_percent == 0): print("NOTE: " + str(int(round(ii * 100.0 / len(input_a)))) + "% of the observations tokenized.") # simple data cleaning, skip observations where input A is invalid if len(txt_a) == 0: continue else: txt_a_untok = txt_a txt_a = tokenizer.tokenize(txt_a) txt_a = txt_a[:min([ max_seq_len, len(txt_a) ])] # NOTE: this suppresses an unnecessary logger warning # simple data cleaning, skip observations where input B is invalid if input_b is not None: txt_b = input_b[ii] txt_b_untok = txt_b if len(txt_b) == 0: continue else: txt_b = tokenizer.tokenize(txt_b) txt_b = txt_b[:min([ max_seq_len, len(txt_b) ])] # NOTE: this supresses an unnecessary logger warning else: txt_b = None txt_b_untok = None # simple data cleaning, skip observations where target is invalid (i.e. not numeric data) if target is not None: cur_tgt = target[ii] if isinstance(cur_tgt, list): tst_val = cur_tgt[0] else: tst_val = cur_tgt if not isinstance(tst_val, Number): continue else: cur_tgt = None # observation weight if obs_weight is not None: cur_wgt = obs_weight[ii] if not isinstance(cur_wgt, Number): raise DLPyError('Observation weights must be a numeric type.') else: cur_wgt = None # extra variable(s) if extra_var is not None: cur_extra_var = [None] * len(extra_var) for jj, ev_dict in enumerate(extra_var): cur_extra_var[jj] = ev_dict['values'][ii] else: cur_extra_var = None # tokenize text txt_encoding = tokenizer.encode_plus(txt_a, text_pair=txt_b, add_special_tokens=True, return_special_tokens_mask=True, max_length=max_seq_len) tmp_tokenized_text = tokenizer.convert_ids_to_tokens( txt_encoding['input_ids']) # set segment ID if segment_vocab_size == 2: seg_idx = txt_encoding['token_type_ids'] elif segment_vocab_size == 1: seg_idx = [0] * len(tmp_tokenized_text) else: seg_idx = None # check for truncated sequence(s) if 'num_truncated_tokens' in txt_encoding: num_truncated += 1 # tokenization error-checking num_tokens = len(tmp_tokenized_text) tokenized_text = [None] * num_tokens for jj in range(num_tokens): if tmp_tokenized_text[jj] in BertCommon['reserved_names']: raise DLPyError('Match for reserved names: ' + tmp_tokenized_text[jj]) elif tmp_tokenized_text[jj] in BertCommon['special_chars']: tokenized_text[jj] = '[' + tmp_tokenized_text[jj] + ']' else: tokenized_text[jj] = tmp_tokenized_text[jj] # verify targets match inputs for sequence labeling tasks (assume single segment only for now) if isinstance(cur_tgt, list): multiple_targets = True if neutral_label is None: raise DLPyError( "Neutral label must be specified for sequence labeling tasks." ) if txt_b_untok is None: num_words = len(txt_a_untok.split( BertCommon['text_delimiter'])) else: num_words = ( len(txt_a_untok.split(BertCommon['text_delimiter'])) + len(txt_b_untok.split(BertCommon['text_delimiter']))) num_tgts = len(cur_tgt) if num_words != num_tgts: raise DLPyError( "Mismatch in length of input/target for observation " + str(ii)) # tokenization adds special tokens and may split words into multiple tokens. Add # neutral labels for special tokens and repeat target labels for words split by # tokenization. new_tgt = [ neutral_label if mask == 1 else None for mask in txt_encoding['special_tokens_mask'] ] txt_words = txt_a_untok.split(BertCommon['text_delimiter']) if txt_b_untok is not None: txt_words += txt_b_untok.split(BertCommon['text_delimiter']) idx = 0 for cur_word, cur_label in zip(txt_words, cur_tgt): # skip over special token(s) if txt_encoding['special_tokens_mask'][idx] == 1: idx += [ jj for jj, val in enumerate( txt_encoding['special_tokens_mask'][idx:]) if val == 0 ][0] word_tokens = tokenizer.tokenize(cur_word) new_tgt[idx:idx + len(word_tokens)] = [cur_label] * len(word_tokens) idx += len(word_tokens) cur_tgt = new_tgt.copy() # check for defective observation (i.e. must have at least beginning and ending # "special" tokens for a valid observation (e.g. [CLS] tok1 tok2 ... [SEP] for # a BERT model) if sum(txt_encoding['special_tokens_mask']) >= 2: token_strings[obs_idx] = BertCommon['text_delimiter'].join( tokenized_text) # position tokenized_position = [None] * num_tokens for jj in range(num_tokens): tokenized_position[jj] = BertCommon['reserved_names'][ 'position_prefix'] + str(jj) position_strings[obs_idx] = BertCommon['text_delimiter'].join( tokenized_position) # segment if segment_vocab_size > 0: tokenized_segment = [None] * num_tokens for jj in range(num_tokens): tokenized_segment[jj] = BertCommon['reserved_names'][ 'segment_prefix'] + str(seg_idx[jj]) segment_strings[obs_idx] = BertCommon['text_delimiter'].join( tokenized_segment) # target if cur_tgt is not None: if classification_problem: if isinstance(cur_tgt, list): # zero pad target list target_array[obs_idx] = [str(0)] * max_seq_len for jj, tgt in enumerate(cur_tgt): target_array[obs_idx][jj] = str(int(tgt)) tgtlen_array[obs_idx] = len(cur_tgt) else: target_array[obs_idx] = str(int(cur_tgt)) tgtlen_array[obs_idx] = 1 else: if isinstance(cur_tgt, list): raise DLPyError( 'Multiple regression problems not supported.') else: target_array[obs_idx] = cur_tgt tgtlen_array[obs_idx] = 1 # weight if cur_wgt is not None: weight_array[obs_idx] = cur_wgt # extra variable(s) if cur_extra_var is not None: extra_var_array[obs_idx] = cur_extra_var # increment the valid observation index obs_idx += 1 else: print('WARNING: observation #: ' + str(ii)) raise DLPyError('Input string could not be tokenized.') if verbose: print("NOTE: all observations tokenized.\n") # reduce lists and inform user if one or more observations discarded if obs_idx < len(input_a): token_strings = token_strings[0:obs_idx] position_strings = position_strings[0:obs_idx] if segment_vocab_size > 0: segment_strings = segment_strings[0:obs_idx] if target is not None: target_array = target_array[0:obs_idx] tgtlen_array = tgtlen_array[0:obs_idx] if obs_weight is not None: weight_array = weight_array[0:obs_idx] if extra_var is not None: extra_var_array = extra_var_array[0:obs_idx] print( 'NOTE: observations with empty/invalid input or targets were discarded. There are\n' '' + str(obs_idx) + ' out of ' + str(len(input_a)) + ' observations remaining.\n') # inform user if one or more observations truncated if num_truncated > 0: print( 'WARNING: ' + str(num_truncated) + ' out of ' + str(len(input_a)) + ' observations exceeded the maximum sequence length\n' 'These observations have been truncated so that only the first ' + str(max_seq_len) + ' tokens are used.\n') # set up variable names/types if segment_vocab_size > 0: var_names = [ ds_vars['token_var'], ds_vars['position_var'], ds_vars['segment_var'] ] var_type = ['VARCHAR', 'VARCHAR', 'VARCHAR'] else: var_names = [ds_vars['token_var'], ds_vars['position_var']] var_type = ['VARCHAR', 'VARCHAR'] num_target_var = None if target is not None: if multiple_targets: num_target_var = max_seq_len else: num_target_var = 1 var_names += generate_target_var_names(ds_vars, num_target_var) var_names += [ds_vars['target_len_var']] if classification_problem: var_type += ['VARCHAR'] * num_target_var + ['NUMERIC'] else: var_type += ['NUMERIC'] * num_target_var + ['NUMERIC'] if obs_weight is not None: var_names += [ds_vars['weight_var']] var_type += ['NUMERIC'] if extra_var is not None: var_names += extra_var_names var_type += extra_var_types # check whether splitting to training/testing data sets or just a single data set if (train_fraction is not None) and (train_fraction > 0.0): np.random.seed(seed=seed) idx_prob = np.random.uniform(low=0.0, high=1.0, size=(obs_idx, )) num_train = 0 num_test = 0 for ii in range(obs_idx): if idx_prob[ii] < train_fraction: num_train += 1 else: num_test += 1 # split data to train/test data sets # token, position, segment train_token_strings = [None] * num_train train_position_strings = [None] * num_train if segment_vocab_size > 0: train_segment_strings = [None] * num_train # test_token_strings = [None] * num_test test_position_strings = [None] * num_test if segment_vocab_size > 0: test_segment_strings = [None] * num_test # target if target is not None: train_target_array = [None] * num_train train_tgtlen_array = [None] * num_train # test_target_array = [None] * num_test test_tgtlen_array = [None] * num_test # weight if obs_weight is not None: train_weight_array = [None] * num_train # test_weight_array = [None] * num_test # extra variable(s) if extra_var is not None: train_extra_var_array = [None] * num_train # test_extra_var_array = [None] * num_test train_idx = 0 test_idx = 0 for ii in range(obs_idx): if idx_prob[ii] < train_fraction: # train data set train_token_strings[train_idx] = token_strings[ii] train_position_strings[train_idx] = position_strings[ii] if segment_vocab_size > 0: train_segment_strings[train_idx] = segment_strings[ii] # NOTE: each element of train target array may be a value or a list if target is not None: train_target_array[train_idx] = target_array[ii] train_tgtlen_array[train_idx] = tgtlen_array[ii] if obs_weight is not None: train_weight_array[train_idx] = weight_array[ii] # NOTE: each element of train extra var array is a list if extra_var is not None: train_extra_var_array[train_idx] = extra_var_array[ii] train_idx += 1 else: # test data set test_token_strings[test_idx] = token_strings[ii] test_position_strings[test_idx] = position_strings[ii] if segment_vocab_size > 0: test_segment_strings[test_idx] = segment_strings[ii] # NOTE: each element of test target array may be a value or a list if target is not None: test_target_array[test_idx] = target_array[ii] test_tgtlen_array[test_idx] = tgtlen_array[ii] if obs_weight is not None: test_weight_array[test_idx] = weight_array[ii] # NOTE: each element of test extra var array is a list if extra_var is not None: test_extra_var_array[test_idx] = extra_var_array[ii] test_idx += 1 # create CAS table for training data train_data_set = 'bert_train_data' if segment_vocab_size > 0: dlist = [ train_token_strings, train_position_strings, train_segment_strings ] else: dlist = [train_token_strings, train_position_strings] if target is not None: if isinstance(train_target_array[0], list): for ii in range(len(train_target_array[0])): tmp_array = [ train_target_array[jj][ii] for jj in range(train_idx) ] dlist += [tmp_array] else: dlist += [train_target_array] dlist += [train_tgtlen_array] if obs_weight is not None: dlist += [train_weight_array] if extra_var is not None: for ii in range(len(train_extra_var_array[0])): tmp_array = [ train_extra_var_array[jj][ii] for jj in range(train_idx) ] dlist += [tmp_array] if verbose: print("NOTE: uploading training data to table " + train_data_set + ".") print("NOTE: there are " + str(num_train) + " observations in the training data set.\n") handler1 = BertDMH(dlist, var_names, var_type) conn.retrieve('table.addtable', _messagelevel='error', table=train_data_set, replace=True, **handler1.args.addtable) # create CAS table for test data test_data_set = 'bert_test_validation_data' if segment_vocab_size > 0: dlist = [ test_token_strings, test_position_strings, test_segment_strings ] else: dlist = [test_token_strings, test_position_strings] if target is not None: if isinstance(test_target_array[0], list): for ii in range(len(test_target_array[0])): tmp_array = [ test_target_array[jj][ii] for jj in range(test_idx) ] dlist += [tmp_array] else: dlist += [test_target_array] dlist += [test_tgtlen_array] if obs_weight is not None: dlist += [test_weight_array] if extra_var is not None: for ii in range(len(test_extra_var_array[0])): tmp_array = [ test_extra_var_array[jj][ii] for jj in range(test_idx) ] dlist += [tmp_array] if verbose: print("NOTE: uploading test/validation data to table " + test_data_set + ".") print("NOTE: there are " + str(num_test) + " observations in the test/validation data set.\n") handler2 = BertDMH(dlist, var_names, var_type) conn.retrieve('table.addtable', _messagelevel='error', table=test_data_set, replace=True, **handler2.args.addtable) if verbose: print("NOTE: training and test/validation data sets ready.\n") return num_target_var, train_data_set, test_data_set else: # single data set unified_data_set = 'bert_data' if segment_vocab_size > 0: dlist = [token_strings, position_strings, segment_strings] else: dlist = [token_strings, position_strings] if target is not None: if isinstance(target_array[0], list): for ii in range(len(target_array[0])): tmp_array = [target_array[jj][ii] for jj in range(obs_idx)] dlist += [tmp_array] else: dlist += [target_array] dlist += [tgtlen_array] if obs_weight is not None: dlist += [weight_array] if extra_var is not None: for ii in range(len(extra_var_array[0])): tmp_array = [extra_var_array[jj][ii] for jj in range(obs_idx)] dlist += [tmp_array] if verbose: print("NOTE: uploading data to table " + unified_data_set + ".") print("NOTE: there are " + str(obs_idx) + " observations in the data set.\n") handler = BertDMH(dlist, var_names, var_type) conn.retrieve('table.addtable', _messagelevel='error', table=unified_data_set, replace=True, **handler.args.addtable) if verbose: print("NOTE: data set ready.\n") return num_target_var, unified_data_set
def write_keras_hdf5(model, rnn_support, hdf5_out): ''' Generate an HDF5 file with trained model parameters given a Keras definition Parameters ---------- model : Keras model Keras deep learning model rnn_support : boolean Indicates whether importing RNN models is supported hdf5_out : string Fully qualified file name of SAS-compatible HDF5 file ''' # open output file try: f_out = h5py.File(hdf5_out, 'w') except IOError: raise DLPyError('The specified file cannot be written: ' + hdf5_out) model_type = None use_gpu = None try: # determine model type # NOTE: must check ALL RNN layers to determine # whether model must run on GPU gpu_layers = [] cpu_layers = [] for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: # check for RNN layers if class_name in rnn_layer_classes: model_type = 'RNN' image_data_format = None if class_name in rnn_gpu_layer_classes: gpu_layers.append(True) elif class_name in rnn_cpu_layer_classes: cpu_layers.append(True) # verify that model is supported by SAS Deep Learning if model_type == 'RNN': if rnn_support: if (len(gpu_layers) > 0) and (len(cpu_layers) == 0): use_gpu = True elif (len(gpu_layers) == 0) and (len(cpu_layers) > 0): use_gpu = False elif (len(gpu_layers) > 0) and (len(cpu_layers) > 0): raise DLPyError('A mixture of CPU and GPU layers was detected. ' 'This is not supported by SAS Deep Learning.') else: raise DLPyError('RNN model detected: your Viya deployment does not support ' 'importing an RNN model.') if model_type is None: found_cnn_layer = False for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: # check for CNN layers if class_name in conv_layer_classes: model_type = 'CNN' image_data_format = K.image_data_format() found_cnn_layer = True if found_cnn_layer: break if model_type is None: raise DLPyError('Only RNN and CNN models are currently supported.') # determine layers with weights filtered_layers = [] filtered_layer_names = [] for layer in model.layers: weights = layer.weights if weights: filtered_layers.append(layer) filtered_layer_names.append(layer.name) # determine permutation vector associated with flattening layer (if it exists) if model_type == 'CNN': flatten_layer_index = -1 index = 0 for layer in model.layers: if layer.__class__.__name__.lower() == 'flatten': flatten_layer_index = index break index = index + 1 if flatten_layer_index != -1: layer = model.layers[flatten_layer_index] permute_layer_name = model.layers[flatten_layer_index + 1].name if image_data_format == 'channels_first': C, H, W = (layer.input_shape)[1:] else: H, W, C = (layer.input_shape)[1:] N = (layer.output_shape)[1] perm_index = [0] * N if image_data_format == 'channels_last': ii = 0 for cc in range(C): for hh in range(H): for ww in range(W): perm_index[ii] = hh * W * C + ww * C + cc ii = ii + 1 else: for nn in range(N): perm_index[nn] = nn else: perm_index = [] permute_layer_name = None else: perm_index = [] permute_layer_name = None # populate attributes with layer names attrib_layer_names = [] for name in filtered_layer_names: layer = model.get_layer(name=name) class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: attrib_layer_names.append(tlayer.name) f_out.attrs['layer_names'] = [replace_forward_slash(l).encode('utf8') for l in attrib_layer_names] # let Keras read weights, reformat, and write to SAS-compatible file for k, layer in enumerate(filtered_layers): symbolic_weights = layer.weights weight_values = K.batch_get_value(symbolic_weights) weight_names = [] for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): if hasattr(w, 'name') and w.name: name = str(w.name) else: name = 'param_' + str(i) #weight_names.append(name.encode('utf8')) weight_names.append(name) # layer modification from here: new_weight_names = [] if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the saved file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') # create CPU-compatible layer cpu_layer = create_cpu_compatible_layer(layer, model_type) # read/write weights class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: g_out = f_out.create_group(replace_forward_slash(tlayer.name)) new_weight_names = [] wgt_idx = 0 for ii,wgt_name in enumerate(weight_names): if tlayer.name in wgt_name: if type(weight_values[ii]) == np.ndarray: tensor_in = weight_values[ii] else: tensor_in = np.zeros(weight_values[ii].shape, dtype=weight_values[ii].dtype) weight_values[ii].read_direct(tensor_in) # permute axes as needed to conform to SAS deep # learning "channels first" format if (image_data_format is not None) and (image_data_format == 'channels_first'): # format: (C,fdim1, fdim2, fdim3) ==> (C,fdim3,fdim1,fdim2) if len(tensor_in.shape) == 4: tensor_out = np.transpose(tensor_in, (0, 3, 1, 2)) else: tensor_out = tensor_in.copy() else: # "channels last" format or not image processing problem # process RNN layers first if class_name in rnn_layer_classes: cpu_class_name, cpu_sublayers = remove_layer_wrapper(cpu_layer) if (len(tensor_in.shape) == 1) and (class_name != cpu_class_name): tensor_out = np.tile(0.5 * tensor_in, 2) else: tensor_out = tensor_in.copy() # not an RNN layer, but this is a vector - nothing to permute elif len(tensor_in.shape) == 1: tensor_out = tensor_in.copy() else: # permute Conv2D tensor to "channels_first" format if class_name == 'conv2d': tensor_out = np.transpose(tensor_in, (3, 2, 0, 1)) # have to account for neuron ordering in first dense # layer following flattening operation elif class_name == 'dense': if (permute_layer_name is not None) and (tlayer.name == permute_layer_name): tensor_out = np.zeros(tensor_in.shape) for jj in range(tensor_out.shape[0]): tensor_out[jj, :] = tensor_in[perm_index[jj], :] else: # not following flattening, just copy tensor_out = tensor_in.copy() # mimic Caffe layout tensor_out = np.transpose(tensor_out, (1, 0)) # save weight in format amenable to SAS dset_name = generate_dataset_name(tlayer, wgt_idx) wgt_idx = wgt_idx + 1 new_weight_names.append(dset_name) g_out.create_dataset(dset_name, data=tensor_out) # update weight names g_out.attrs['weight_names'] = new_weight_names except ValueError as err_msg: print(err_msg) finally: # close files f_out.close() return use_gpu
def write_keras_hdf5_from_file(model, rnn_support, hdf5_in, hdf5_out): ''' Generate an HDF5 file with trained model parameters given a Keras definition Parameters ---------- model : Keras model Keras deep learning model rnn_support : boolean Indicates whether importing RNN models is supported hdf5_in : string Fully qualified file name of Keras HDF5 file hdf5_out : string Fully qualified file name of SAS-compatible HDF5 file ''' # open input/output files if os.path.isfile(hdf5_in): f_in = h5py.File(hdf5_in, 'r') try: f_out = h5py.File(hdf5_out, 'w') except IOError: raise DLPyError('The specified file cannot be written: ' + hdf5_out) else: raise DLPyError('The specified file does not exist: ' + hdf5_in) if 'keras_version' in f_in.attrs: original_keras_version = f_in.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f_in.attrs: original_backend = f_in.attrs['backend'].decode('utf8') else: original_backend = None model_type = None use_gpu = None try: # determine model type # NOTE: must check ALL RNN layers to determine # whether model must run on GPU gpu_layers = [] cpu_layers = [] for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: # check for RNN layers if class_name in rnn_layer_classes: model_type = 'RNN' image_data_format = None if class_name in rnn_gpu_layer_classes: gpu_layers.append(True) elif class_name in rnn_cpu_layer_classes: cpu_layers.append(True) # verify that model is supported by SAS Deep Learning if model_type == 'RNN': if rnn_support: if (len(gpu_layers) > 0) and (len(cpu_layers) == 0): use_gpu = True elif (len(gpu_layers) == 0) and (len(cpu_layers) > 0): use_gpu = False elif (len(gpu_layers) > 0) and (len(cpu_layers) > 0): raise DLPyError('A mixture of CPU and GPU layers was detected. ' 'This is not supported by SAS Deep Learning.') else: raise DLPyError('RNN model detected: your Viya deployment does not support ' 'importing an RNN model.') if model_type is None: found_cnn_layer = False for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: # check for CNN layers if class_name in conv_layer_classes: model_type = 'CNN' image_data_format = K.image_data_format() found_cnn_layer = True if found_cnn_layer: break if model_type is None: raise DLPyError('Only RNN and CNN models are currently supported.') # navigate to correct HDF5 group if 'layer_names' in f_in.attrs.keys(): root_group = f_in elif 'layer_names' in f_in['model_weights'].attrs.keys(): root_group = f_in['model_weights'] else: raise DLPyError('Cannot read HDF5 file correctly') # determine layers with weights filtered_layers = [] for layer in model.layers: weights = layer.weights if weights: filtered_layers.append(layer) layer_names = [n.decode('utf8') for n in root_group.attrs['layer_names']] filtered_layer_names = [] for name in layer_names: g = root_group[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names if len(layer_names) != len(filtered_layers): raise ValueError('You are trying to load a weight file ' 'containing ' + str(len(layer_names)) + ' layers into a model with ' + str(len(filtered_layers)) + ' layers.') # determine permutation vector associated with flattening layer (if it exists) if model_type == 'CNN': flatten_layer_index = -1 index = 0 for layer in model.layers: if layer.__class__.__name__.lower() == 'flatten': flatten_layer_index = index break index = index + 1 if flatten_layer_index != -1: layer = model.layers[flatten_layer_index] permute_layer_name = model.layers[flatten_layer_index + 1].name if image_data_format == 'channels_first': C, H, W = (layer.input_shape)[1:] else: H, W, C = (layer.input_shape)[1:] N = (layer.output_shape)[1] perm_index = [0] * N if image_data_format == 'channels_last': ii = 0 for cc in range(C): for hh in range(H): for ww in range(W): perm_index[ii] = hh * W * C + ww * C + cc ii = ii + 1 else: for nn in range(N): perm_index[nn] = nn else: perm_index = [] permute_layer_name = None else: perm_index = [] permute_layer_name = None # populate attributes with layer names attrib_layer_names = [] for name in layer_names: layer = model.get_layer(name=name) class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: attrib_layer_names.append(tlayer.name) f_out.attrs['layer_names'] = [replace_forward_slash(l).encode('utf8') for l in attrib_layer_names] # let Keras read weights, reformat, and write to SAS-compatible file for k, name in enumerate(layer_names): g_in = root_group[name] layer = filtered_layers[k] weight_names = [n.decode('utf8') for n in g_in.attrs['weight_names']] weight_values = [g_in[weight_name] for weight_name in weight_names] symbolic_weights = layer.weights # create CPU-compatible layer cpu_layer = create_cpu_compatible_layer(layer, model_type) # use Keras to load/preprocess weights weight_values = preprocess_weights_for_loading(cpu_layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the saved file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') if layer.__class__.__name__.lower() == 'batchnormalization': bn_gamma = np.ones(weight_values[0].shape, dtype=weight_values[0].dtype) bn_beta = np.zeros(weight_values[0].shape, dtype=weight_values[0].dtype) layer_config = layer.get_config() # if scale = False and center = True if not layer_config['scale'] and layer_config['center']: weight_values.insert(0, bn_gamma) weight_names.insert(0, replace_forward_slash(layer.name)+'/'+'gamma:0') # if scale = True and center = False elif layer_config['scale'] and not layer_config['center']: weight_values.insert(1, bn_beta) weight_names.insert(1, replace_forward_slash(layer.name)+'/'+'beta:0') # if scale = False and center = False elif not layer_config['scale'] and not layer_config['center']: weight_values = [bn_gamma, bn_beta] + weight_values weight_names = [replace_forward_slash(layer.name)+'/'+'gamma:0', replace_forward_slash(layer.name)+'/'+'beta:0'] + weight_names # add epsilon to variance values to avoid divide by zero if 'epsilon' in layer_config.keys(): for ii,wgt_name in enumerate(weight_names): if 'moving_variance' in wgt_name: weight_values[ii] = weight_values[ii] + (layer_config['epsilon']* np.ones(weight_values[ii].shape, dtype=weight_values[ii].dtype)) # read/write weights class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: g_out = f_out.create_group(replace_forward_slash(tlayer.name)) new_weight_names = [] wgt_idx = 0 for ii,wgt_name in enumerate(weight_names): if tlayer.name in wgt_name: if type(weight_values[ii]) == np.ndarray: tensor_in = weight_values[ii] else: tensor_in = np.zeros(weight_values[ii].shape, dtype=weight_values[ii].dtype) weight_values[ii].read_direct(tensor_in) # permute axes as needed to conform to SAS deep # learning "channels first" format if (image_data_format is not None) and (image_data_format == 'channels_first'): # format: (C,fdim1, fdim2, fdim3) ==> (C,fdim3,fdim1,fdim2) if len(tensor_in.shape) == 4: tensor_out = np.transpose(tensor_in, (0, 3, 1, 2)) else: tensor_out = tensor_in.copy() else: # "channels last" format or not image processing problem # process RNN layers first if class_name in rnn_layer_classes: cpu_class_name, cpu_sublayers = remove_layer_wrapper(cpu_layer) if (len(tensor_in.shape) == 1) and (class_name != cpu_class_name): tensor_out = np.tile(0.5 * tensor_in, 2) else: tensor_out = tensor_in.copy() # not an RNN layer, but this is a vector - nothing to permute elif len(tensor_in.shape) == 1: tensor_out = tensor_in.copy() else: # permute Conv2D tensor to "channels_first" format if class_name == 'conv2d': tensor_out = np.transpose(tensor_in, (3, 2, 0, 1)) # have to account for neuron ordering in first dense # layer following flattening operation elif class_name == 'dense': if (permute_layer_name is not None) and (tlayer.name == permute_layer_name): tensor_out = np.zeros(tensor_in.shape) for jj in range(tensor_out.shape[0]): tensor_out[jj, :] = tensor_in[perm_index[jj], :] else: # not following flattening, just copy tensor_out = tensor_in.copy() # mimic Caffe layout tensor_out = np.transpose(tensor_out, (1, 0)) # save weight in format amenable to SAS dset_name = generate_dataset_name(tlayer, wgt_idx) wgt_idx = wgt_idx + 1 new_weight_names.append(dset_name) g_out.create_dataset(dset_name, data=tensor_out) # update weight names g_out.attrs['weight_names'] = new_weight_names except ValueError as err_msg: print(err_msg) finally: # close files f_out.close() f_in.close() return use_gpu
def build_embedding_model(cls, branch, model_table=None, embedding_model_type='Siamese', embedding_layer=None, margin=None): ''' Build an embedding model based on a given model branch and model type Parameters ---------- branch : Model Specifies the base model that is used as branches for embedding model. model_table : string or dict or CAS table, optional Specifies the CAS table to store the deep learning model. Default: None embedding_model_type : string, optional Specifies the embedding model type that the created table will be applied for training. Valid values: Siamese, Triplet, and Quartet. Default: Siamese embedding_layer: Layer, optional Specifies a dense layer as the embedding layer. For instance, Dense(n=10, act='identity') defines the embedding dimension is 10. When it is not given, the last layer (except the task layers) in the branch model will be used as the embedding layer. margin: double, optional Specifies the margin value used by the embedding model. When it is not given, for Siamese, margin is 2.0. Otherwise, margin is 0.0. Returns ------- :class:`Model` ''' # check the branch type if not isinstance(branch, Model): raise DLPyError('The branch option must contain a valid model') # the branch must be built using functional APIs # only functional model has the attr output_layers if not hasattr(branch, 'output_layers'): print("NOTE: Convert the branch model into a functional model.") branch_tensor = branch.to_functional_model() else: branch_tensor = deepcopy(branch) # always reset this local tensor to 0 branch_tensor.number_of_instances = 0 # the branch cannot contain other task layers if len(branch_tensor.output_layers) != 1: raise DLPyError( 'The branch model cannot contain more than one output layer') elif branch_tensor.output_layers[0].type == OutputLayer.type or \ branch_tensor.output_layers[0].type == Keypoints.type: print("NOTE: Remove the task layers from the model.") branch_tensor.layers.remove(branch_tensor.output_layers[0]) branch_tensor.output_layers[0] = branch_tensor.layers[-1] elif branch_tensor.output_layers[0].can_be_last_layer: raise DLPyError( 'The branch model cannot contain task layer except output or keypoints layer.' ) # check embedding_model_type if embedding_model_type.lower() not in [ 'siamese', 'triplet', 'quartet' ]: raise DLPyError('Only Siamese, Triplet, and Quartet are valid.') if embedding_model_type.lower() == 'siamese': if margin is None: margin = 2.0 cls.number_of_branches = 2 elif embedding_model_type.lower() == 'triplet': if margin is None: margin = 0.0 cls.number_of_branches = 3 elif embedding_model_type.lower() == 'quartet': if margin is None: margin = 0.0 cls.number_of_branches = 4 cls.embedding_model_type = embedding_model_type # build the branches input_layers = [] branch_layers = [] for i_branch in range(cls.number_of_branches): temp_input_layer = Input(**branch_tensor.layers[0].config, name=cls.input_layer_name_prefix + str(i_branch)) temp_branch = branch_tensor( temp_input_layer) # return a list of tensors if embedding_layer: temp_embed_layer = deepcopy(embedding_layer) temp_embed_layer.name = cls.embedding_layer_name_prefix + str( i_branch) temp_branch = temp_embed_layer(temp_branch) # change tensor to a list temp_branch = [temp_branch] else: # change the last layer name to the embedding layer name temp_branch[ -1]._op.name = cls.embedding_layer_name_prefix + str( i_branch) if i_branch == 0: cls.branch_input_tensor = temp_input_layer if len(temp_branch) == 1: cls.branch_output_tensor = temp_branch[0] else: cls.branch_output_tensor = temp_branch # append these layers to the current branch input_layers.append(temp_input_layer) branch_layers = branch_layers + temp_branch # add the embedding loss layer loss_layer = EmbeddingLoss( margin=margin, name=cls.embedding_loss_layer_name)(branch_layers) # create the model DAG using all the above model information model = EmbeddingModel(branch.conn, model_table=model_table, inputs=input_layers, outputs=loss_layer) # sharing weights # get all layer names from one branch num_l = int((len(model.layers) - 1) / cls.number_of_branches) br1_name = [i.name for i in model.layers[:num_l - 1]] # build the list that contain the shared layers share_list = [] n_id = 0 n_to = n_id + cls.number_of_branches for l in br1_name[1:]: share_list.append( {l: [l + '_' + str(i + 1) for i in range(n_id + 1, n_to)]}) # add embedding layers share_list.append({ cls.embedding_layer_name_prefix + str(0): [ cls.embedding_layer_name_prefix + str(i) for i in range(1, cls.number_of_branches) ] }) model.share_weights(share_list) model.compile() # generate data_specs if embedding_model_type.lower() == 'siamese': cls.data_specs = [ DataSpec(type_='image', layer=cls.input_layer_name_prefix + '0', data=['_image_']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '1', data=['_image_1']), DataSpec(type_='numnom', layer=cls.embedding_loss_layer_name, data=['_dissimilar_']) ] elif embedding_model_type.lower() == 'triplet': cls.data_specs = [ DataSpec(type_='image', layer=cls.input_layer_name_prefix + '0', data=['_image_']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '1', data=['_image_1']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '2', data=['_image_2']) ] elif embedding_model_type.lower() == 'quartet': cls.data_specs = [ DataSpec(type_='image', layer=cls.input_layer_name_prefix + '0', data=['_image_']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '1', data=['_image_1']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '2', data=['_image_2']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '3', data=['_image_3']) ] return model
def deploy_embedding_model(self, path, output_format='astore', model_type='branch'): """ Deploy the deep learning model to a data file Parameters ---------- path : string Specifies the location to store the model files. If the output_format is set to castable, then the location has to be on the server-side. Otherwise, the location has to be on the client-side. output_format : string, optional Specifies the format of the deployed model. When astore is specified, the learned embedding features will be output as well. Valid Values: astore, castable, or onnx Default: astore model_type : string, optional Specifies how to deploy the embedding model. "branch" means only one branch model is deployed to extract features while "full" means the full model is deployed to extract features for all branches and compute the distance metric values for all input data pairs. Valid values: branch and full Default: branch Notes ----- Currently, this function supports sashdat, astore, and onnx formats. More information about ONNX can be found at: https://onnx.ai/ DLPy supports ONNX version >= 1.3.0, and Opset version 8. For ONNX format, currently supported layers are convo, pool, fc, batchnorm, residual, concat, reshape, and detection. If dropout is specified in the model, train the model using inverted dropout, which can be specified in :class:`Optimizer`. This will ensure the results are correct when running the model during test phase. Returns -------- :class:`Model` for a branch model when model_type is 'branch' """ if model_type.lower() not in ['branch', 'full']: raise DLPyError('Only branch and full are valid.') if model_type.lower() == 'full': temp_embed_layers = [] for i_branch in range(self.number_of_branches): temp_embed_layers.append(self.embedding_layer_name_prefix + str(i_branch)) self.deploy(path=path, output_format=output_format, layers=temp_embed_layers) else: # create a fake task layer fake_output_layer = OutputLayer(n=1, name='Output1')( self.branch_output_tensor) # build the branch model from the tensor branch_model = Model(self.conn, inputs=self.branch_input_tensor, outputs=fake_output_layer, model_table=self.model_name + '_branch') branch_model.compile() # attach weights weight_tbl = WeightsTable(self.conn, self.model_weights.name, self.model_name) branch_model.set_weights(weight_tbl) # inherit the weight attr from the full model self.conn.retrieve('table.attribute', _messagelevel='error', name=self.model_weights.name, task='CONVERT', attrtable=self.model_weights.name + '_attr') self.conn.retrieve('table.attribute', _messagelevel='error', name=branch_model.model_weights.name, task='ADD', attrtable=self.model_weights.name + '_attr') self.conn.retrieve('table.dropTable', _messagelevel='error', table=self.model_weights.name + '_attr') # add model attrs data_specs = [ DataSpec(type_='IMAGE', layer=self.input_layer_name_prefix + '0', data=['_image_']), DataSpec(type_='NUMNOM', layer='Output1', data=['_fake_output_'], nominals=['_fake_output_']) ] create_extended_attributes(branch_model.conn, branch_model.model_name, branch_model.layers, data_specs) # deploy it temp_embed_layer = self.embedding_layer_name_prefix + '0' branch_model.deploy(path=path, output_format=output_format, layers=temp_embed_layer) return branch_model
def fit_embedding_model(self, optimizer, data=None, path=None, n_samples=512, label_level=-2, resize_width=None, resize_height=None, max_iter=1, valid_table=None, valid_freq=1, gpu=None, seed=0, record_seed=0, save_best_weights=False, n_threads=None, train_from_scratch=None): """ Fitting a deep learning model for embedding learning. Parameters ---------- optimizer : :class:`Optimizer` Specifies the parameters for the optimizer. data : class:`ImageEmbeddingTable`, optional This is the input data. It muse be a ImageEmbeddingTable object. Either data or path has to be specified. path : string, optional The path to the image directory on the server. Path may be absolute, or relative to the current caslib root. when path is specified, the data option will be ignored. A new sample of data will be randomly generated after the number of epochs defined in Optimizer. max_iter defines how many iterations the random sample will be generated. n_samples : int, optional Number of samples to generate. Default: 512 label_level : int, optional Specifies which path level should be used to generate the class labels for each image. This class label determines whether a given image pair belongs to the same class. For instance, label_level = 1 means the first directory and label_level = -2 means the last directory. This internally use the SAS scan function (check https://www.sascrunch.com/scan-function.html for more details). Default: -2 resize_width : int, optional Specifies the image width that needs be resized to. When resize_width is not given, it will be reset to the specified resize_height. resize_height : int, optional Specifies the image height that needs be resized to. When resize_height is not given, it will be reset to the specified resize_width. max_iter : int, optional Hard limit on iterations when randomly generating data. Default: 1 valid_table : string or CASTable, optional Specifies the table with the validation data. The validation table must have the same columns and data types as the training table. valid_freq : int, optional Specifies the frequency for scoring the validation table. gpu : :class:`Gpu`, optional When specified, the action uses graphical processing unit hardware. The simplest way to use GPU processing is to specify "gpu=1". In this case, the default values of other GPU parameters are used. Setting gpu=1 enables all available GPU devices for use. Setting gpu=0 disables GPU processing. seed : double, optional specifies the random number seed for the random number generator in SGD. The default value, 0, and negative values indicate to use random number streams based on the computer clock. Specify a value that is greater than 0 for a reproducible random number sequence. record_seed : double, optional specifies the random number seed for the random record selection within a worker. The default value 0 disables random record selection. Records are read as they are laid out in memory. Negative values indicate to use random number streams based on the computer clock. save_best_weights : bool, optional When set to True, it keeps the weights that provide the smallest loss error. n_threads : int, optional Specifies the number of threads to use. If nothing is set then all of the cores available in the machine(s) will be used. train_from_scratch : bool, optional When set to True, it ignores the existing weights and trains the model from the scratch. Returns -------- :class:`CASResults` or a list of `CASResults` when the path option is specified """ # check options if data is None and path is None: raise DLPyError( 'Either the data option or path must be specified to generate the input data' ) if data is not None and path is not None: print( 'Note: the data option will be ignored and the path option will be used to generate the input ' 'data') # check the data type if path is None: if not isinstance(data, ImageEmbeddingTable): raise DLPyError( 'The data option must contain a valid embedding table') if data.embedding_model_type.lower() != self.embedding_model_type: raise DLPyError( 'The data option must contain a valid embedding table for ' + self.embedding_model_type) # use the data option to train a model if path is None: res = self.fit(data, inputs=None, target=None, data_specs=self.data_specs, optimizer=optimizer, valid_table=valid_table, valid_freq=valid_freq, gpu=gpu, seed=seed, record_seed=record_seed, force_equal_padding=True, save_best_weights=save_best_weights, n_threads=n_threads, target_order=None, train_from_scratch=train_from_scratch) else: # use the path option to generate the input data import time res = [] time_start = time.time() for data_iter in range(0, max_iter): # generate a new data table time_0 = time.time() data = ImageEmbeddingTable.load_files( self.conn, path=path, n_samples=n_samples, label_level=label_level, embedding_model_type=self.embedding_model_type, resize_width=resize_width, resize_height=resize_height) print( 'Note: data generation took {} (s) at iteration {}'.format( time.time() - time_0, data_iter)) # train the model using this data if data_iter == 0: train_from_scratch_real = train_from_scratch else: train_from_scratch_real = False res_t = self.fit(data, inputs=None, target=None, data_specs=self.data_specs, optimizer=optimizer, valid_table=valid_table, valid_freq=valid_freq, gpu=gpu, seed=seed, record_seed=record_seed, force_equal_padding=True, save_best_weights=save_best_weights, n_threads=n_threads, target_order=None, train_from_scratch=train_from_scratch_real) res.append(res_t) # drop this data data.droptable() print('Note: Training with data generation took {} (s)'.format( time.time() - time_start)) return res
def YoloV2(conn, anchors, model_table='YoloV2', n_channels=3, width=416, height=416, scale=1.0 / 255, random_mutation=None, act='leaky', act_detection='AUTO', softmax_for_class_prob=True, coord_type='YOLO', max_label_per_image=30, max_boxes=30, n_classes=20, predictions_per_grid=5, do_sqrt=True, grid_number=13, coord_scale=None, object_scale=None, prediction_not_a_object_scale=None, class_scale=None, detection_threshold=None, iou_threshold=None, random_boxes=False, match_anchor_size=None, num_to_force_coord=None, random_flip=None, random_crop=None): ''' Generates a deep learning model with the Yolov2 architecture. Parameters ---------- conn : CAS Specifies the connection of the CAS connection. anchors : list Specifies the anchor box values. model_table : string, optional Specifies the name of CAS table to store the model. n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 416 height : int, optional Specifies the height of the input layer. Default: 416 scale : double, optional Specifies a scaling factor to be applied to each pixel intensity values. Default: 1.0 / 255 random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' act : string, optional Specifies the activation function for the batch normalization layers. Default: 'leaky' act_detection : string, optional Specifies the activation function for the detection layer. Valid Values: AUTO, IDENTITY, LOGISTIC, SIGMOID, TANH, RECTIFIER, RELU, SOFPLUS, ELU, LEAKY, FCMP Default: AUTO softmax_for_class_prob : bool, optional Specifies whether to perform Softmax on class probability per predicted object. Default: True coord_type : string, optional Specifies the format of how to represent bounding boxes. For example, a bounding box can be represented with the x and y locations of the top-left point as well as width and height of the rectangle. This format is the 'rect' format. We also support coco and yolo formats. Valid Values: 'rect', 'yolo', 'coco' Default: 'yolo' max_label_per_image : int, optional Specifies the maximum number of labels per image in the training. Default: 30 max_boxes : int, optional Specifies the maximum number of overall predictions allowed in the detection layer. Default: 30 n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 20 predictions_per_grid : int, optional Specifies the amount of predictions will be done per grid. Default: 5 do_sqrt : bool, optional Specifies whether to apply the SQRT function to width and height of the object for the cost function. Default: True grid_number : int, optional Specifies the amount of cells to be analyzed for an image. For example, if the value is 5, then the image will be divided into a 5 x 5 grid. Default: 13 coord_scale : float, optional Specifies the weight for the cost function in the detection layer, when objects exist in the grid. object_scale : float, optional Specifies the weight for object detected for the cost function in the detection layer. prediction_not_a_object_scale : float, optional Specifies the weight for the cost function in the detection layer, when objects do not exist in the grid. class_scale : float, optional Specifies the weight for the class of object detected for the cost function in the detection layer. detection_threshold : float, optional Specifies the threshold for object detection. iou_threshold : float, optional Specifies the IOU Threshold of maximum suppression in object detection. random_boxes : bool, optional Randomizing boxes when loading the bounding box information. Default: False match_anchor_size : bool, optional Whether to force the predicted box match the anchor boxes in sizes for all predictions num_to_force_coord : int, optional The number of leading chunk of images in training when the algorithm forces predicted objects in each grid to be equal to the anchor box sizes, and located at the grid center random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' Returns ------- :class:`Sequential` References ---------- https://arxiv.org/pdf/1612.08242.pdf ''' if len(anchors) != 2 * predictions_per_grid: raise DLPyError( 'The size of the anchor list in the detection layer for YOLOv2 should be equal to ' 'twice the number of predictions_per_grid.') model = Sequential(conn=conn, model_table=model_table) parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) if input_parameters['width'] != input_parameters['height']: print( not_supported_feature('Non-square yolo model training', 'height=width')) input_parameters['height'] = input_parameters['width'] model.add(InputLayer(**input_parameters)) # conv1 224 416 model.add(Conv2d(32, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) model.add(Pooling(width=2, height=2, stride=2, pool='max')) # conv2 112 208 model.add(Conv2d(64, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) model.add(Pooling(width=2, height=2, stride=2, pool='max')) # conv3 56 104 model.add( Conv2d(128, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv4 56 104 model.add(Conv2d(64, width=1, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv5 56 104 model.add( Conv2d(128, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) model.add(Pooling(width=2, height=2, stride=2, pool='max')) # conv6 28 52 model.add( Conv2d(256, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv7 28 52 model.add( Conv2d(128, width=1, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv8 28 52 model.add( Conv2d(256, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) model.add(Pooling(width=2, height=2, stride=2, pool='max')) # conv9 14 26 model.add( Conv2d(512, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv10 14 26 model.add( Conv2d(256, width=1, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv11 14 26 model.add( Conv2d(512, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv12 14 26 model.add( Conv2d(256, width=1, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv13 14 26 model.add( Conv2d(512, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) model.add(Pooling(width=2, height=2, stride=2, pool='max')) # conv14 7 13 model.add( Conv2d(1024, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv15 7 13 model.add( Conv2d(512, width=1, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv16 7 13 model.add( Conv2d(1024, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv17 7 13 model.add( Conv2d(512, width=1, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) # conv18 7 13 model.add( Conv2d(1024, width=3, act='identity', include_bias=False, stride=1)) model.add(BN(act=act)) model.add( Conv2d((n_classes + 5) * predictions_per_grid, width=1, act='identity', include_bias=False, stride=1)) model.add( Detection(act=act_detection, detection_model_type='yolov2', anchors=anchors, softmax_for_class_prob=softmax_for_class_prob, coord_type=coord_type, class_number=n_classes, grid_number=grid_number, predictions_per_grid=predictions_per_grid, do_sqrt=do_sqrt, coord_scale=coord_scale, object_scale=object_scale, prediction_not_a_object_scale=prediction_not_a_object_scale, class_scale=class_scale, detection_threshold=detection_threshold, iou_threshold=iou_threshold, random_boxes=random_boxes, max_label_per_image=max_label_per_image, max_boxes=max_boxes, match_anchor_size=match_anchor_size, num_to_force_coord=num_to_force_coord)) return model
def remove_layer_wrapper(layer): ''' Determines underlying layer type for wrapped layers Parameters ---------- layer : Layer object Current layer object Returns ------- string class name of wrapped layer list of layer objects unwrapped layer object(s) ''' class_name = layer.__class__.__name__.lower() # check for layer wrappers sublayers = [] if class_name == 'timedistributed': layer_info = layer.get_config()['layer'] layer_info['config']['name'] = layer.name class_name = layer_info['class_name'].lower() if class_name == 'dense': sublayers.append(keras.layers.Dense(**layer_info['config'])) else: raise DLPyError(class_name + ' is an unsupported time distributed ' 'layer type - model conversion failed') elif class_name == 'bidirectional': layer_info = layer.get_config()['layer'] class_name = layer_info['class_name'].lower() # forward direction layer_info['config']['name'] = layer.forward_layer.name layer_info['config']['go_backwards'] = False if class_name == 'lstm': sublayers.append(keras.layers.LSTM(**layer_info['config'])) elif class_name == 'gru': sublayers.append(keras.layers.GRU(**layer_info['config'])) elif class_name == 'simplernn': sublayers.append(keras.layers.SimpleRNN(**layer_info['config'])) elif class_name == 'cudnnlstm': sublayers.append(keras.layers.CuDNNLSTM(**layer_info['config'])) elif class_name == 'cudnngru': sublayers.append(keras.layers.CuDNNGRU(**layer_info['config'])) else: raise DLPyError(class_name + ' is an unsupported time distributed ' 'layer type - model conversion failed') # backward direction layer_info['config']['name'] = layer.backward_layer.name layer_info['config']['go_backwards'] = True if class_name == 'lstm': sublayers.append(keras.layers.LSTM(**layer_info['config'])) elif class_name == 'gru': sublayers.append(keras.layers.GRU(**layer_info['config'])) elif class_name == 'simplernn': sublayers.append(keras.layers.SimpleRNN(**layer_info['config'])) elif class_name == 'cudnnlstm': sublayers.append(keras.layers.CuDNNLSTM(**layer_info['config'])) elif class_name == 'cudnngru': sublayers.append(keras.layers.CuDNNGRU(**layer_info['config'])) else: raise DLPyError(class_name + ' is an unsupported time distributed ' 'layer type - model conversion failed') else: sublayers.append(layer) # Must return sublayers in reverse order if CUDNN is used. # This aligns the Viya layer mapping with the CUDNN layer # mapping. if layer.__class__.__name__.lower() == 'bidirectional': sublayer_info = layer.get_config()['layer'] if sublayer_info['class_name'].lower() in ['cudnnlstm', 'cudnngru']: sublayers.reverse() #sublayers = [sublayers[1], sublayers[0]] return class_name, sublayers
def display_predicted_image_captions(conn, result_tbl, npreds=2, ncol=2, img_path=None, figsize=None): ''' Shows caption prediction for random images Parameters ---------- conn : CAS Specifies the CAS connection object. result_tbl : CASResults object Table containing results from scoring the test data npreds : int, optional Specifies number of caption predictions to show Default : 2 ncol : int, optional Specifies number of columns to display images in Default : 2 img_path : string, optional If used, specifies path to wanted_file to show images along with captions and objects. If None, only shows captions and objects Default : None figsize : tuple of ints, optional Specifies size of images to be displayed Default : (16,(16 / ncol*nrow)) ''' results = scored_results_to_dict(result_tbl) nimages = min(npreds, len(results)) if img_path is None: for i in range(nimages): r = random.randint(0, len(results) - 1) f_name = list(results.keys())[r] actual_caps = (conn.CASTable( result_tbl.name, where='''_filename_0="{}"'''.format(f_name)).iloc[:, 'caption'] ).values truth = "\n\t".join(actual_caps) objects = (conn.CASTable(result_tbl.name, where='''_filename_0="{}"'''.format( f_name)).iloc[:, 'first_objects']).values objects = "\n\t".join(objects[0].split(',')) rand_row = results[f_name] prediction = rand_row[1] print( "Filename: {}\nObjects: {}\nGround Truth: {}\nPredicted: {}\n". format(f_name, objects, truth, prediction)) else: if nimages > ncol: nrow = nimages // ncol + 1 else: nrow = 1 ncol = nimages if figsize is None: figsize = (16, 16 // ncol * nrow) fig = plt.figure(figsize=figsize) for i in range(nimages): r = random.randint(0, len(results) - 1) f_name = list(results.keys())[r] rand_row = results[f_name] actual_caps = (conn.CASTable( result_tbl.name, where='''_filename_0="{}"'''.format(f_name)).iloc[:, 'caption'] ).values truth = "\n".join(actual_caps) objects = (conn.CASTable(result_tbl.name, where='''_filename_0="{}"'''.format( f_name)).iloc[:, 'first_objects']).values objects = objects[0] caption = rand_row[1] if '/' in img_path: image = '{}/{}'.format(img_path, f_name) elif '\\' in img_path: image = '{}\{}'.format(img_path, f_name) else: raise DLPyError('img_path given is not a valid path') image = mpimg.imread(image) ax = fig.add_subplot(nrow, ncol, i + 1) ax.set_title('Objects: {}\nGround Truth: {}\nPredicted: {}'.format( objects, truth, caption)) plt.imshow(image) plt.xticks([]), plt.yticks([]) plt.show()
def DenseNet121_ONNX(conn, model_file, n_classes=1000, width=224, height=224, offsets=(255 * 0.406, 255 * 0.456, 255 * 0.485), norm_stds=(255 * 0.225, 255 * 0.224, 255 * 0.229), random_flip=None, random_crop=None, random_mutation=None, include_top=False): """ Generates a deep learning model with the DenseNet121_ONNX architecture. The model architecture and pre-trained weights is generated from DenseNet121 ONNX trained on ImageNet dataset. The model file and the weights file can be downloaded from https://support.sas.com/documentation/prod-p/vdmml/zip/. To learn more information about the model and pre-processing. Please go to the websites: https://github.com/onnx/models/tree/master/vision/classification/densenet-121. Parameters ---------- conn : CAS Specifies the CAS connection object. model_file : string Specifies the absolute server-side path of the model table file. The model table file can be downloaded from https://support.sas.com/documentation/prod-p/vdmml/zip/. n_classes : int, optional Specifies the number of classes. Default: 1000 width : int, optional Specifies the width of the input layer. Default: 224 height : int, optional Specifies the height of the input layer. Default: 224 offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. The channel order is BGR. Default: (255*0.406, 255*0.456, 255*0.485) norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. The channel order is BGR. Default: (255*0.225, 255*0.224, 255*0.229) random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' include_top : bool, optional Specifies whether to include pre-trained weights of the top layers (i.e., the FC layers) Default: False """ parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) # load model and model weights model = Model.from_sashdat(conn, path=model_file) # check if a user points to a correct model. if model.summary.shape[0] != 307: raise DLPyError( "The model file doesn't point to a valid DenseNet121_ONNX model. " "Please check the SASHDAT file.") # extract input layer config model_table_df = conn.CASTable(**model.model_table).to_frame() input_layer_df = model_table_df[model_table_df['_DLLayerID_'] == 0] input_layer = extract_input_layer(input_layer_df) input_layer_config = input_layer.config # update input layer config input_layer_config.update(input_parameters) # update the layer list model.layers[0] = InputLayer(**input_layer_config, name=model.layers[0].name) # warning if model weights doesn't exist if not conn.tableexists(model.model_weights.name).exists: weights_file_path = os.path.join(os.path.dirname(model_file), model.model_name + '_weights.sashdat') print('WARNING: Model weights is not attached ' 'since system cannot find a weights file located at {}'.format( weights_file_path)) if include_top: if n_classes != 1000: raise DLPyError( "If include_top is enabled, n_classes has to be 1000.") else: # since the output layer is non fully connected layer, # we need to modify the convolution right before the output. The number of filter is set to n_classes. conv_layer_df = model_table_df[model_table_df['_DLLayerID_'] == 305] conv_layer = extract_conv_layer(conv_layer_df) conv_layer_config = conv_layer.config # update input layer config conv_layer_config.update({'n_filters': n_classes}) # update the layer list model.layers[-2] = Conv2d(**conv_layer_config, name=model.layers[-2].name, src_layers=model.layers[-3]) # overwrite n_classes in output layer out_layer_df = model_table_df[model_table_df['_DLLayerID_'] == 306] out_layer = extract_output_layer(out_layer_df) out_layer_config = out_layer.config # update input layer config out_layer_config.update({'n': n_classes}) # update the layer list model.layers[-1] = OutputLayer(**out_layer_config, name=model.layers[-1].name, src_layers=model.layers[-2]) # remove top weights model.model_weights.append_where('_LayerID_<305') model._retrieve_('table.partition', table=model.model_weights, casout=dict(replace=True, name=model.model_weights.name)) model.set_weights(model.model_weights.name) # recompile the whole network according to the new layer list model.compile() return model
def bert_summary(conn, table_name, full_table=True, subset_fraction=0.1): ''' Display summary statistics for tokenized data from a given CAS table Parameters ---------- conn : CAS Connection Specifies the CAS connection. table_name : string Specifies the name of the CAS table. full_table: boolean, optional Specifies whether statistics are calculated over full table or subset. Default: True subset_fraction : float, optional Specifies the fraction of the table to use to calculate summary statistics. May be necessary for large tables. Default: 0.1 ''' r = conn.retrieve('table.recordcount', _messagelevel='error', table=table_name) num_obs = r['RecordCount']['N'].values[0] print("NOTE: there are " + str(num_obs) + " observations in the Viya table.") if not full_table: num_obs_calc = int(round(num_obs * subset_fraction)) print("NOTE: calculating summary statistics based on the first " + str(round(subset_fraction * 100.0)) + "% of the table.\n") else: num_obs_calc = num_obs chunk_size = min([num_obs_calc, 10000]) min_tokens = sys.maxsize max_tokens = 0 sum_tokens = 0 sum_sq_tokens = 0 token_var = BertCommon['variable_names']['token_var'] for ii in range(0, num_obs_calc, chunk_size): num_rows = min([chunk_size, num_obs_calc - ii]) tmp = conn.retrieve('table.fetch', _messagelevel='error', table=table_name, maxrows=num_rows, from_=ii, to=ii + num_rows) col_names = list(tmp['Fetch']) if token_var not in list(tmp['Fetch']): raise DLPyError("Missing variable " + token_var + " in table " + table_name + ".") tmp_list = tmp['Fetch'][token_var].to_list() obs_num_tokens = [ len(tmp_list[jj].split(' ')) for jj in range(num_rows) ] min_tokens = min([min_tokens, min(obs_num_tokens)]) max_tokens = max([max_tokens, max(obs_num_tokens)]) sum_tokens += sum(obs_num_tokens) sum_sq_tokens += sum( [x1 * x2 for x1, x2 in zip(obs_num_tokens, obs_num_tokens)]) mean_num_tokens = sum_tokens / num_obs_calc std_num_tokens = np.sqrt(sum_sq_tokens / num_obs_calc - mean_num_tokens * mean_num_tokens) print("NOTE: minimum number of tokens in an observation = " + str(min_tokens)) print("NOTE: maximum number of tokens in an observation = " + str(max_tokens)) print("NOTE: average number of tokens in an observation = " + str(mean_num_tokens)) print( "NOTE: standard deviation of the number of tokens in an observation = " + str(std_num_tokens) + '\n')
def load_audio_files(cls, conn, path, casout=None, caslib=None): ''' Load audio files from path Parameters ---------- conn : CAS CAS connection object path : string Path to audio files casout : dict or string or CASTable, optional The output CAS table specification caslib : string, optional The caslib to load audio files from Returns ------- :class:`AudioTable` If audio files are found None If no audio files are found ''' conn.loadactionset('audio', _messagelevel='error') if casout is None: casout = dict(name=random_name('AudioTable', 6)) elif isinstance(casout, CASTable): casout = casout.to_outtable_params() if caslib is None: # get the os of the server server_type = get_cas_host_type(conn).lower() if server_type.startswith("lin") or server_type.startswith("osx"): path_split = path.rsplit("/", 1) else: path_split = path.rsplit("\\", 1) # try accessing the file if len(path_split) == 2: caslib = find_caslib(conn, path_split[0]) if caslib is not None: rt2 = conn.retrieve('audio.loadaudio', _messagelevel='error', casout=casout, caslib=caslib, path=path_split[1]) if rt2.severity > 1: for msg in rt2.messages: print(msg) raise DLPyError( 'cannot load audio files, something is wrong!') cls.running_caslib = path_split[0] return AudioTable(casout['name']) else: caslib = random_name('Caslib', 6) rt2 = conn.retrieve('addcaslib', _messagelevel='error', name=caslib, path=path_split[0], activeonadd=False, subdirectories=True, datasource={'srctype': 'path'}) if rt2.severity < 2: rt3 = conn.retrieve('audio.loadaudio', _messagelevel='error', casout=casout, caslib=caslib, path=path_split[1]) if rt3.severity > 1: for msg in rt3.messages: print(msg) raise DLPyError( 'cannot load audio files, something is wrong!') else: cls.running_caslib = path_split[0] return AudioTable(casout['name']) return None else: rt4 = conn.retrieve('audio.loadaudio', _messagelevel='error', casout=casout, caslib=caslib, path=path) if rt4.severity > 1: for msg in rt4.messages: print(msg) raise DLPyError('cannot load audio files, something is wrong!') cls.running_caslib = find_path_of_caslib(conn, caslib) return AudioTable(casout['name'])
def write_block_information(pymodel, layer_info, keywords, f_out): ''' Write information for a block of layers to an HDF5 file Parameters ---------- pymodel : PyTorch model Specifies the Pytorch model object. layer_info : list of dictionaries Specifies a list of dictionaries - each dictionary entry defines a layer in the model. keywords : list of strings Specifies the keywords to search for in the PyTorch model dictionary. f_out: file handle Specifies the HDF5 file handle. Returns ------- PyTorch parameter name, PyTorch tensor ''' if isinstance(keywords, list): key_list = keywords else: key_list = [keywords] for ii in range(len(layer_info)): for key in layer_info[ii].keys(): lname = layer_info[ii][key]['name'] ltype = layer_info[ii][key]['type'] ldim = layer_info[ii][key]['dim'] if ltype != BertCommon['layer_types']['noparms']: matval, vecval = extract_pytorch_parms(pymodel, lname, ltype, ldim, key_list) # there should be only one match for a given layer if (len(matval) > 1) or ((vecval != None) and (len(vecval) > 1)): raise DLPyError( 'There were multiple Pytorch layers that matched layer ' + lname) g_out = f_out.create_group(lname) new_weight_names = [] # save weights in format amenable to SAS dset_name = generate_hdf5_dataset_name( lname, BertCommon['weight_index']) new_weight_names.append(dset_name) g_out.create_dataset(dset_name, data=matval[0]) # save bias in format amenable to SAS if vecval is not None: dset_name = generate_hdf5_dataset_name( lname, BertCommon['bias_index']) new_weight_names.append(dset_name) g_out.create_dataset(dset_name, data=vecval[0]) # update weight names g_out.attrs['weight_names'] = new_weight_names
def create_audio_table_speechrecognition( cls, conn, data_path, metadata_path, features_parameters=dict(frame_shift=10, frame_length=25, n_bins=40, n_ceps=40, feature_scaling_method='STANDARDIZATION', n_output_frames=500), casout=None): ''' Creates an Audio table and takes care of all the necessary steps Parameters ---------- conn : CAS A connection object to the current session. data_path : string Path to the file that contains the list of audio files (this is expected to be on the server side). metadata_path : string Location to the metadata file (this is expected to be on the client side). features_parameters : dict, optional Parameters to be used while extracting audio features casout : string, optional Resulting output CAS table Returns ------- :class:`AudioTable` A table containing audio features of audio files as well as their labels. The resulting table can be directly used in the deep learning models. Raises ------ DLPyError If anything goes wrong at any in the process of creating this AudioTable, it complains and prints the appropriate message. ''' au = cls.load_audio_files(conn, data_path) if au is None: raise DLPyError('cannot load audio files') fp = features_parameters features = cls.extract_audio_features( conn, au, frame_shift=fp['frame_shift'], frame_length=fp['frame_length'], n_bins=fp['n_bins'], n_ceps=fp['n_ceps'], feature_scaling_method=fp['feature_scaling_method'], n_output_frames=fp['n_output_frames'], copyvars=['_path_']) if features is None: raise DLPyError('cannot extract audio features') if cls.running_caslib is None: raise DLPyError( 'there is something wrong, cannot identify the current caslib') me = cls.load_audio_metadata(conn, metadata_path, audio_path=cls.running_caslib) if me is None: raise DLPyError('cannot load the audio metadata') conn.loadactionset('deeplearn', _messagelevel='error') if casout is None: casout = dict(name=random_name('AudioTable', 6)) elif isinstance(casout, CASTable): casout = casout.to_outtable_params() if 'name' not in casout: casout['name'] = random_name('AudioTable', 6) rt = conn.retrieve('dlJoin', _messagelevel='error', casout=casout, annotation=me, table=features, id='_path_') if rt.severity > 1: for msg in rt.messages: print(msg) raise DLPyError('cannot create the final audio table!') return AudioTable(casout['name'])
def keras_to_sas(model, rnn_support, model_name=None, offsets=None, std=None, scale=1.0, max_num_frames=-1, verbose=False): output_code = '' layer_activation = {} src_layer = {} layer_dropout = {} if model_name is None: model_name = model.name model_type = 'CNN' n_lambda_layer = 0 for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: if (class_name in computation_layer_classes) or (class_name == 'zeropadding2d'): comp_layer_name = find_previous_computation_layer( model, layer.name, computation_layer_classes) source_str = make_source_str(comp_layer_name) src_layer.update({tlayer.name: source_str}) elif class_name == 'activation': tmp_name = find_previous_computation_layer( model, layer.name, computation_layer_classes) tmp_act = extract_activation(layer) layer_activation.update({tmp_name[0]: tmp_act}) elif class_name == 'dropout': tmp = find_next_computation_layer(model, layer, dropout_layer_classes) dconfig = layer.get_config() layer_dropout.update({tmp: dconfig['rate']}) # check for RNN model if class_name in [ 'simplernn', 'lstm', 'gru', 'cudnnlstm', 'cudnngru' ]: if rnn_support: model_type = 'RNN' else: raise DLPyError( 'RNN model detected: your Viya deployment does not support ' 'importing an RNN model.') # check for Lambda layers if layer.__class__.__name__.lower() == 'lambda': n_lambda_layer = n_lambda_layer + 1 # if first layer is not an input layer, generate the correct # input layer code for a SAS deep learning model layer = model.layers[0] if layer.__class__.__name__.lower() != 'inputlayer': sas_code = keras_input_layer(layer, model_name, False, offsets, std, scale, model_type, max_num_frames) # write SAS code for input layer if sas_code: output_code = output_code + sas_code + '\n\n' else: raise KerasParseError('Unable to generate an input layer') # only one Lambda layer supported, and it must be the last model layer # assumption: CTC loss must be specified for an RNN model using a # Lambda layer ctc_loss = False if n_lambda_layer > 0: layer = model.layers[-1] if (n_lambda_layer == 1) and (layer.__class__.__name__.lower() == 'lambda') and (model_type == 'RNN'): ctc_loss = True if verbose: print( 'WARNING - detected a Lambda layer terminating the Keras model. This is assumed to be ' 'the CTC loss function definition. If that is incorrect, please revise your Keras model.' ) else: raise KerasParseError( 'Detected one or more Lambda layers. Only 1 Lambda ' 'layer is supported for RNN models, and it must be ' 'the last layer.') # extract layers and apply activation functions as needed zero_pad = None for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: sas_code = None # determine activation function if class_name in ['conv2d', 'batchnormalization', 'add', 'dense']: if layer.name in layer_activation.keys(): act_func = layer_activation[layer.name] else: act_func = None else: act_func = None # average/max pooling/globalaveragepooling if class_name in [ 'averagepooling2d', 'maxpooling2d', 'globalaveragepooling2d' ]: sas_code = keras_pooling_layer(tlayer, model_name, class_name, src_layer, layer_dropout, zero_pad) zero_pad = None # 2D convolution elif class_name == 'conv2d': sas_code = keras_convolution_layer(tlayer, model_name, act_func, src_layer, layer_dropout, zero_pad) zero_pad = None # batch normalization elif class_name == 'batchnormalization': sas_code = keras_batchnormalization_layer( tlayer, model_name, act_func, src_layer) # input layer elif class_name == 'inputlayer': sas_code = keras_input_layer(tlayer, model_name, True, offsets, std, scale, model_type, max_num_frames) # add elif class_name == 'add': sas_code = keras_residual_layer(tlayer, model_name, act_func, src_layer) elif class_name in [ 'activation', 'flatten', 'dropout', 'zeropadding2d', 'lambda' ]: pass # fully connected elif class_name == 'dense': sas_code = keras_full_connect_layer(tlayer, model_name, act_func, src_layer, layer_dropout, ctc_loss) # concatenate elif class_name == 'concatenate': sas_code = keras_concatenate_layer(tlayer, model_name, act_func, src_layer) # recurrent elif class_name in [ 'simplernn', 'lstm', 'gru', 'cudnnlstm', 'cudnngru' ]: sas_code = keras_recurrent_layer(tlayer, model_name, act_func, src_layer) else: raise KerasParseError(class_name + ' is an unsupported layer ' 'type - model conversion failed') # write SAS code associated with Keras layer if sas_code: output_code = output_code + sas_code + '\n\n' # zero-padding elif (class_name == 'zeropadding2d'): zero_pad = keras_zeropad2d_layer(tlayer, src_layer) elif (class_name not in ['activation', 'flatten', 'dropout', 'lambda']): if verbose: print('WARNING: unable to generate SAS definition ' 'for layer ' + tlayer.name) return output_code
def create_embeddings_from_object_detection(conn, image_table, detection_model, word_embeddings_file, n_threads=None, gpu=None, max_objects=5, word_delimiter='\t'): ''' Builds CASTable with objects detected in images as numeric data Parameters ---------- conn : CAS Specifies the CAS connection object. image_table: imageTable Specifies name of CASTable that contains images to be used for training detection_model : CASTable or string Specifies CASTable containing model parameters for the object detection model word_embeddings_file : string Specifies full path to file containing pre-trained word vectors to be used for text generation This file should be accessible from the client. n_threads : int, optional Specifies the number of threads to use when scoring the table. All cores available used when nothing is set. Default : None gpu : Gpu, optional When specified, specifies which gpu to use when scoring the table. GPU=1 uses all available GPU devices and default parameters. Default : None max_objects : int, optional Specifies max number of objects detected if less than five Default : 5 word_delimiter : string, optional Specifies delimiter used in word_embeddings file Default : '\t' Returns ------- :class:`CASTable` ''' if not os.path.exists(word_embeddings_file): raise DLPyError('word_embeddings_file does not exist') if not isinstance(image_table, ImageTable): raise DLPyError('image_table must be an ImageTable object') conn.loadactionset('deepLearn') conn.loadactionset('textparse') width = detection_model.summary['Output Size'][0][1] height = detection_model.summary['Output Size'][0][0] image_table.resize(width=width, height=height) scoring_error = False try: scored = detection_model.predict(data=image_table, n_threads=n_threads, gpu=gpu) except: scoring_error = True if scoring_error or scored is None: raise DLPyError('Something went wrong while scoring the data.') object_table = detection_model.valid_res_tbl # combine first n objects into single column first_objects = object_table.copy() first_objects['first_objects'] = first_objects['_Object0_'] + "," if max_objects > 5: max_objects = 5 for i in range(1, max_objects): objects = first_objects['_Object{}_'.format(i)] + "," first_objects['first_objects'] = first_objects['first_objects'].add( objects) objects_numeric = numeric_parse_text(conn, first_objects, word_embeddings_file, word_delimiter=word_delimiter) # merge objects table and numeric table df1 = objects_numeric.to_frame() df2 = first_objects.to_frame() objects = pd.merge(df1, df2, left_on='_id_', right_on='_id_', how='left') objects = conn.upload_frame(objects, casout=dict(name='objects', replace=True)) # remove unnecessary columns useful_vars = list(objects_numeric.columns) useful_vars.append('_filename_0') useful_vars.append('first_objects') bad_columns = set(list(objects.columns)) - set(useful_vars) final_objects = objects.drop(bad_columns, axis=1) return final_objects
def Faster_RCNN(conn, model_table='Faster_RCNN', n_channels=3, width=1000, height=496, scale=1, norm_stds=None, offsets=(102.9801, 115.9465, 122.7717), random_mutation=None, n_classes=20, anchor_num_to_sample=256, anchor_ratio=[0.5, 1, 2], anchor_scale=[8, 16, 32], base_anchor_size=16, coord_type='coco', max_label_per_image=200, proposed_roi_num_train=2000, proposed_roi_num_score=300, roi_train_sample_num=128, roi_pooling_height=7, roi_pooling_width=7, nms_iou_threshold=0.3, detection_threshold=0.5, max_object_num=50, number_of_neurons_in_fc=4096, backbone='vgg16', random_flip=None, random_crop=None): ''' Generates a deep learning model with the faster RCNN architecture. Parameters ---------- conn : CAS Specifies the connection of the CAS connection. model_table : string, optional Specifies the name of CAS table to store the model. n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 1000 height : int, optional Specifies the height of the input layer. Default: 496 scale : double, optional Specifies a scaling factor to be applied to each pixel intensity values. Default: 1 norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 20 anchor_num_to_sample : int, optional Specifies the number of anchors to sample for training the region proposal network Default: 256 anchor_ratio : iter-of-float Specifies the anchor height and width ratios (h/w) used. anchor_scale : iter-of-float Specifies the anchor scales used based on base_anchor_size base_anchor_size : int, optional Specifies the basic anchor size in width and height (in pixels) in the original input image dimension Default: 16 coord_type : int, optional Specifies the coordinates format type in the input label and detection result. Valid Values: RECT, COCO, YOLO Default: COCO proposed_roi_num_score: int, optional Specifies the number of ROI (Region of Interest) to propose in the scoring phase Default: 300 proposed_roi_num_train: int, optional Specifies the number of ROI (Region of Interest) to propose used for RPN training, and also the pool to sample from for FastRCNN Training in the training phase Default: 2000 roi_train_sample_num: int, optional Specifies the number of ROIs(Regions of Interests) to sample after NMS(Non-maximum Suppression) is performed in the training phase. Default: 128 roi_pooling_height : int, optional Specifies the output height of the region pooling layer. Default: 7 roi_pooling_width : int, optional Specifies the output width of the region pooling layer. Default: 7 max_label_per_image : int, optional Specifies the maximum number of labels per image in the training. Default: 200 nms_iou_threshold: float, optional Specifies the IOU threshold of maximum suppression in object detection Default: 0.3 detection_threshold : float, optional Specifies the threshold for object detection. Default: 0.5 max_object_num: int, optional Specifies the maximum number of object to detect Default: 50 number_of_neurons_in_fc: int, or list of int, optional Specifies the number of neurons in the last two fully connected layers. If one int is set, then both of the layers will have the same values. If a list is set, then the layers get different number of neurons. Default: 4096 backbone: string, optional Specifies the architecture to be used as the feature extractor. Valid values: vgg16 Default: vgg16, resnet50, resnet18, resnet34, mobilenetv1, mobilenetv2 random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' Returns ------- :class:`Sequential` References ---------- https://arxiv.org/abs/1506.01497 ''' # calculate number of anchors that equal to product of length of anchor_ratio and length of anchor_scale num_anchors = len(anchor_ratio) * len(anchor_scale) parameters = locals() # get parameters of input, rpn, fast_rcnn layer input_parameters = get_layer_options(input_layer_options, parameters) rpn_parameters = get_layer_options(rpn_layer_options, parameters) fast_rcnn_parameters = get_layer_options(fast_rcnn_options, parameters) inp = Input(**input_parameters, name='data') if backbone.lower() == 'vgg16': # backbone is VGG16 model conv1_1 = Conv2d(n_filters=64, width=3, height=3, stride=1, name='conv1_1')(inp) conv1_2 = Conv2d(n_filters=64, width=3, height=3, stride=1, name='conv1_2')(conv1_1) pool1 = Pooling(width=2, height=2, stride=2, pool='max', name='pool1')(conv1_2) conv2_1 = Conv2d(n_filters=128, width=3, height=3, stride=1, name='conv2_1')(pool1) conv2_2 = Conv2d(n_filters=128, width=3, height=3, stride=1, name='conv2_2')(conv2_1) pool2 = Pooling(width=2, height=2, stride=2, pool='max')(conv2_2) conv3_1 = Conv2d(n_filters=256, width=3, height=3, stride=1, name='conv3_1')(pool2) conv3_2 = Conv2d(n_filters=256, width=3, height=3, stride=1, name='conv3_2')(conv3_1) conv3_3 = Conv2d(n_filters=256, width=3, height=3, stride=1, name='conv3_3')(conv3_2) pool3 = Pooling(width=2, height=2, stride=2, pool='max')(conv3_3) conv4_1 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv4_1')(pool3) conv4_2 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv4_2')(conv4_1) conv4_3 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv4_3')(conv4_2) pool4 = Pooling(width=2, height=2, stride=2, pool='max')(conv4_3) conv5_1 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv5_1')(pool4) conv5_2 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv5_2')(conv5_1) # feature of Conv5_3 is used to generate region proposals last_layer_in_backbone = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv5_3')(conv5_2) # two convolutions build on top of conv5_3 and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) # given ROIs, crop on conv5_3 and resize the feature to the same size roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone.shape[0] / width, name='roi_pooling')([last_layer_in_backbone, rp1]) elif backbone.lower() == 'resnet50': from .resnet import ResNet50_SAS backbone = ResNet50_SAS(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'resnet34': from .resnet import ResNet34_SAS backbone = ResNet34_SAS(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'resnet18': from .resnet import ResNet18_SAS backbone = ResNet18_SAS(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'mobilenetv1': from .mobilenet import MobileNetV1 backbone = MobileNetV1(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'mobilenetv2': from .mobilenet import MobileNetV2 backbone = MobileNetV2(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) else: raise DLPyError('We are not supporting this backbone yet.') # fully connect layer to extract the feature of ROIs if number_of_neurons_in_fc is None: fc6 = Dense(n=4096, act='relu', name='fc6')(roipool1) fc7 = Dense(n=4096, act='relu', name='fc7')(fc6) else: if isinstance(number_of_neurons_in_fc, list): if len(number_of_neurons_in_fc) > 1: fc6 = Dense(n=number_of_neurons_in_fc[0], act='relu', name='fc6')(roipool1) fc7 = Dense(n=number_of_neurons_in_fc[1], act='relu', name='fc7')(fc6) else: fc6 = Dense(n=number_of_neurons_in_fc[0], act='relu', name='fc6')(roipool1) fc7 = Dense(n=number_of_neurons_in_fc[0], act='relu', name='fc7')(fc6) else: fc6 = Dense(n=number_of_neurons_in_fc, act='relu', name='fc6')(roipool1) fc7 = Dense(n=number_of_neurons_in_fc, act='relu', name='fc7')(fc6) # classification tensor cls1 = Dense(n=n_classes + 1, act='identity', name='cls_score')(fc7) # regression tensor(second stage bounding box regression) reg1 = Dense(n=(n_classes + 1) * 4, act='identity', name='bbox_pred')(fc7) # task layer receive cls1, reg1 and rp1(ground truth). Train the second stage. fr1 = FastRCNN(**fast_rcnn_parameters, class_number=n_classes, name='fastrcnn')([cls1, reg1, rp1]) faster_rcnn = Model(conn, inp, fr1, model_table=model_table) faster_rcnn.compile() return faster_rcnn
def segment_audio(path, local_path, data_path_after_caslib, segment_len, framerate, sampwidth): """ Segment the audio into pieces shorter than segment_len. Parameters ---------- path : string Specifies path of the audio file. local_path : string Specifies the location where temporary segmented audio files are stored (server side). data_path_after_caslib : string Specifies the location where temporary segmented audio files are stored (client side, relative to caslib). Note that local_path and data_path_after_caslib actually point to the same position. segment_len : float Specifies the maximum length of one segment in seconds. framerate : int Specifies the desired framerate. sampwidth : int Specifies the desired sampwidth. Returns ------- listing_path_after_caslib : string Path of the file listing the audio segments on the server side, relative to caslib. listing_path_local : string Path of the file listing the audio segments on the client side. segment_path_after_caslib_list : list of string A list of paths of the audio segments on the server side, relative to caslib. segment_path_local_list : list of string A list of paths of the audio segments on client side. """ if os.path.isfile(path): wave_reader, wave_params = read_audio(path) else: raise DLPyError("Cannot find the audio file.") if segment_len <= 0: raise DLPyError( "Incorrect \"segment_len\" value: the segment length maximum can only be positive." ) if segment_len > 35: raise DLPyError( "Incorrect \"segment_len\" value: the segment length maximum cannot be longer than 35 seconds." ) is_framerate_desired = check_framerate(wave_params, framerate) is_sampwidth_desired = check_sampwidth(wave_params, sampwidth) is_stereo = check_stereo(wave_params) # generate the listing file name audio_name = os.path.basename(path) audio_name = os.path.splitext(audio_name)[0] listing_name_no_ext = None listing_name = None while listing_name is None: listing_name_no_ext = random_name(audio_name, 6) listing_name = listing_name_no_ext + ".listing" listing_path_after_caslib = data_path_after_caslib + listing_name listing_path_local = os.path.join(local_path, listing_name) if os.path.exists(listing_path_local): listing_name = None # segmentation segment_nframes_list = calculate_segment_nframes(path, segment_len) print("Note:", str(len(segment_nframes_list)), "temporary audio files are created.") segment_path_after_caslib_list = [] segment_path_local_list = [] with open(listing_path_local, "w") as listing_file: wave_reader.rewind() for i in range(len(segment_nframes_list)): segment_name = listing_name_no_ext + "_" + str(i) + ".wav" segment_path_after_caslib = data_path_after_caslib + segment_name segment_path_local = os.path.join(local_path, segment_name) with wave.open(segment_path_local, "wb") as wave_writer: segment_path_after_caslib_list.append( segment_path_after_caslib) segment_path_local_list.append(segment_path_local) wave_writer.setnchannels(1) wave_writer.setframerate(framerate) wave_writer.setsampwidth(sampwidth) wave_writer.setcomptype(wave_params.comptype, wave_params.compname) fragment = wave_reader.readframes(segment_nframes_list[i]) if is_stereo: fragment = convert_stereo_to_mono(fragment, wave_params.sampwidth) if not is_framerate_desired: fragment = convert_framerate(fragment, wave_params.sampwidth, 1, wave_params.framerate, framerate) if not is_sampwidth_desired: fragment = convert_sampwidth(fragment, wave_params.sampwidth, sampwidth) wave_writer.writeframes(fragment) wave_reader.close() for segment_path_after_caslib in segment_path_after_caslib_list: listing_file.write(segment_path_after_caslib + "\n") # listing_path_after_caslib: to load audio # listing_path_local: to remove listing file # segment_path_after_caslib_list: to concatenate results (add caslib path) # segment_path_local_list: to remove segmented files return listing_path_after_caslib, listing_path_local, segment_path_after_caslib_list, segment_path_local_list