Exemplo n.º 1
0
def ImageCaptioning(conn,
                    model_name='image_captioning',
                    num_blocks=3,
                    neurons=50,
                    rnn_type='LSTM',
                    max_output_len=15):
    '''
    Builds an RNN to be used for image captioning

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    model_name : string, optional
        Specifies output name of the model
        Default: 'image_captioning'
    num_blocks : int, optional
        Specifies number of samelength recurrent layers
        Default : 3
    neurons : int, optional
        Specifies number of neurons in each layer
        Default : 50
    rnn_type : string, optional
        Specifies the type of the rnn layer. Possible Values: RNN, LSTM, GRU
        Default: LSTM
    max_output_len : int, optional
        Specifies max number of tokens to generate in the final layer (i.e. max caption length)
        Default : 15
    Returns
    -------
    :class:`CASTable`

    '''
    if num_blocks < 1:
        raise DLPyError('num_blocks must be greater than 1')

    model = Sequential(conn, model_table=model_name)

    model.add(InputLayer(name='input'))
    print('InputLayer added named "input"')
    for i in range(num_blocks):
        model.add(
            Recurrent(n=neurons,
                      init='msra',
                      rnn_type=rnn_type,
                      output_type='samelength'))

    model.add(
        Recurrent(n=neurons,
                  init='msra',
                  rnn_type=rnn_type,
                  output_type='encoding'))

    model.add(
        Recurrent(n=neurons,
                  init='msra',
                  rnn_type=rnn_type,
                  output_type='arbitrarylength',
                  max_output_length=max_output_len))

    model.add(OutputLayer(name='output'))
    print('OutputLayer added named "output"')
    return model
Exemplo n.º 2
0
def reshape_caption_columns(
    conn,
    table,
    caption_col_name='Var',
    num_captions=5,
):
    '''
    Reshapes table so there is only one caption per row of the table

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    table : CASTable or string
        Specifies name of CASTable containing the merged captions, features, and objects
    caption_col_name : string, optional
        Specifies basename of columns that contain captions
        Default : 'Var'
    num_captions : int, optional
        Specifies number of captions per image
        Default : 5

    Returns
    -------
    :class:`CASTable`

    '''
    # convert table to one caption per line
    columns = list(table.columns)
    if '{}0'.format(caption_col_name) not in columns:
        raise DLPyError(
            'caption_col_name {} does not exist in the table'.format(
                caption_col_name))
    capt_idx_start = columns.index('{}0'.format(caption_col_name))

    # initialize new_tbl dictionary with columns
    new_tbl = dict()
    for c in columns:
        if caption_col_name not in c:
            new_tbl[c] = []
    new_tbl['caption'] = []

    # make list of rows containing only one caption each
    new_tbl_list = list()
    rows = (table.values).tolist()
    try:
        for row in rows:
            for i in range(num_captions):
                new_row = []
                for j in range(len(row)):
                    if j not in range(capt_idx_start,
                                      capt_idx_start + num_captions):
                        new_row.append(row[j])
                new_row.append(row[capt_idx_start + i])
                new_tbl_list.append(new_row)
    except IndexError:
        raise DLPyError("Wrong number of captions specified")

    # add values to dictionary
    for row in new_tbl_list:
        cnt = 0
        for key in new_tbl.keys():
            new_tbl[key].append(row[cnt])
            cnt += 1

    # create CASTable from dictionary
    rnn_input = CASTable.from_dict(conn, new_tbl)

    return rnn_input
Exemplo n.º 3
0
def create_captioning_table(conn,
                            image_table,
                            features_model,
                            captions_file,
                            obj_detect_model=None,
                            word_embeddings_file=None,
                            num_captions=5,
                            dense_layer='fc7',
                            captions_delimiter='\t',
                            caption_col_name='Var',
                            embeddings_delimiter='\t',
                            n_threads=None,
                            gpu=None):
    '''
    Builds CASTable wtih all necessary info to train an image captioning model

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    image_table: imageTable
        Specifies name of CASTable that contains images to be used for training
    features_model : dlpy Model object
        Specifies CNN model to use for extracting features
    captions_file : string
        Specifies absolute path to file containing image filenames and captions
        Client should have access to this file.
    obj_detect_model : CASTable or string, optional
        Specifies CASTable containing model parameters for the object detection model
        Default : None
    word_embeddings_file : string, optional
        Specifies full path to file containing pre-trained word vectors to be used for text generation.
        This file should be accessible from the client.
        Required if obj_detect_model is not None
        Default : None
    num_captions : int, optional
        Specifies number of captions for each image in the captions file
        Default : 5
    dense_layer: string, optional
        Specifies layer from CNN model to extract features from
        Default : 'fc7'
    captions_delimiter : string, optional
        Specifies delimiter between filenames and captions in the image captions text file
        Default : '\t'
    caption_col_name : string, optional
        Specifies base name for column names for the columns containing captions
        Default : 'Var'
    embeddings_delimiter : string, optional
        Specifies delimiter used in word embeddings file
        Default : '\t'
    n_threads : int, optional
        Specifies the number of threads to use when scoring the table. All cores available used when
        nothing is set.
        Default : None
    gpu : Gpu, optional
        When specified, specifies which gpu to use when scoring the table. GPU=1 uses all available
        GPU devices and default parameters.
        Default : None

    Returns
    -------
    :class:`CASTable`

    '''
    # get all necessary tables
    image_features = get_image_features(conn, features_model, image_table,
                                        dense_layer)
    captions_table = create_captions_table(conn,
                                           captions_file,
                                           delimiter=captions_delimiter,
                                           caption_col_name=caption_col_name)

    # merge features and captions tables
    df1 = captions_table.to_frame()
    df2 = image_features.to_frame()
    captions_features = pd.merge(df1,
                                 df2,
                                 left_on='_filename_0',
                                 right_on='_filename_0',
                                 how='left')
    result = conn.upload_frame(captions_features,
                               casout=dict(name='captions_features',
                                           replace=True))
    # conn.dljoin(table=captions_table,annotatedTable=image_features,
    #             id='_filename_0',casOut=dict(name='captions_features',replace=True))
    # result = conn.CASTable('captions_features')

    if obj_detect_model is not None:
        if word_embeddings_file is None:
            raise DLPyError(
                "word_embeddings_file required for object detection")
        else:
            # resize images for object detection scoring
            detected_objects = create_embeddings_from_object_detection(
                conn,
                image_table,
                obj_detect_model,
                word_embeddings_file,
                word_delimiter=embeddings_delimiter,
                n_threads=n_threads,
                gpu=gpu)
            # conn.dljoin(table=dict(name='captions_features'),annotatedTable=detected_objects,
            #             id='_filename_0',casOut=dict(name='obj_capt_feats',replace=True))
            df1 = detected_objects.to_frame()
            df2 = result.to_frame()
            obj_capt_feat = pd.merge(df1,
                                     df2,
                                     left_on='_filename_0',
                                     right_on='_filename_0',
                                     how='left')
            result = conn.upload_frame(obj_capt_feat,
                                       casout=dict(name='full_table',
                                                   replace=True))

    final_table = reshape_caption_columns(conn,
                                          result,
                                          caption_col_name=caption_col_name,
                                          num_captions=num_captions)
    drop_columns = set(final_table.columns) - set(
        captions_table.columns) - set(image_features.columns)
    if obj_detect_model:
        drop_columns = set(drop_columns) - set(detected_objects.columns)
    drop_columns.remove('caption')
    final_table.drop(drop_columns, axis=1, inplace=True)

    return final_table
Exemplo n.º 4
0
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

''' Convert keras model to sas models '''

import os

from keras import backend as K
from distutils.version import StrictVersion
import keras
from dlpy.utils import DLPyError
if StrictVersion( keras.__version__) < '2.1.3' or StrictVersion( keras.__version__) > '2.1.6':
    raise DLPyError('This keras version ('+keras.__version__+') is not supported, '
                                                             'please use a version >= 2.1.3 and <= 2.1.6')

from .write_keras_model_parm import write_keras_hdf5
from .write_sas_code import (write_input_layer, write_convolution_layer,
                             write_batch_norm_layer, write_pooling_layer,
                             write_residual_layer, write_full_connect_layer,
                             write_concatenate_layer, write_main_entry)

computation_layer_classes = ['averagepooling2d', 'maxpooling2d', 'conv2d',
                             'dense', 'batchnormalization', 'add', 'concatenate',
                             'globalaveragepooling2d']
dropout_layer_classes = ['averagepooling2d', 'maxpooling2d', 'conv2d', 'dense']


class KerasParseError(ValueError):
    '''
Exemplo n.º 5
0
def convert_one_audio_file(local_audio_file, converted_local_audio_file):
    '''
    Convert a local audio file into a wav format that only contains 1 channel with 16 bits and 16K HZ.

    Parameters
    ----------
    local_audio_file : string
        Local location to the audio file to be converted.

    converted_local_audio_file : string
        Local location to store the converted audio file

    Returns
    -------
    None

    Raises
    ------
    DLPyError
        If anything goes wrong, it complains and prints the appropriate message.

    '''

    try:
        import soundfile as sf
    except (ModuleNotFoundError, ImportError):
        raise DLPyError('cannot import soundfile')

    audio_name = os.path.basename(local_audio_file)
    output_dir = os.path.dirname(converted_local_audio_file)
    required_sr = 16000
    required_sw = 2

    # check whether the audio file is a wave format
    audio_ext = os.path.splitext(audio_name)[-1]
    audio_name = os.path.splitext(audio_name)[0]
    if audio_ext.lower() != '.wav':
        audio_wav_file = output_dir + random_name(audio_name, 6) + '.wav'
        data, sampling_rate = sf.read(local_audio_file)
        sf.write(audio_wav_file, data, sampling_rate)
    else:
        audio_wav_file = local_audio_file

    # convert the wav file to the required format: 1 channel, 16 bits, and 16K HZ
    wave_reader, wave_params = read_audio(audio_wav_file)
    is_framerate_desired = check_framerate(wave_params, required_sr)
    is_sampwidth_desired = check_sampwidth(wave_params, required_sw)
    is_stereo = check_stereo(wave_params)

    if converted_local_audio_file == audio_wav_file:
        real_converted_local_audio_file = converted_local_audio_file + '.tmp'
    else:
        real_converted_local_audio_file = converted_local_audio_file

    with wave.open(real_converted_local_audio_file, "wb") as wave_writer:
        wave_writer.setnchannels(1)
        wave_writer.setframerate(required_sr)
        # 16 bits
        wave_writer.setsampwidth(2)
        wave_writer.setcomptype(wave_params.comptype, wave_params.compname)
        fragment = wave_reader.readframes(wave_params.nframes)

        # 1 channel
        if is_stereo:
            fragment = convert_stereo_to_mono(fragment, wave_params.sampwidth)

        # 16K HZ
        if not is_framerate_desired:
            fragment = convert_framerate(fragment, wave_params.sampwidth, 1,
                                         wave_params.framerate, required_sr)

        # 16 bits
        if not is_sampwidth_desired:
            fragment = convert_sampwidth(fragment, wave_params.sampwidth,
                                         required_sw)

        wave_writer.writeframes(fragment)

    wave_reader.close()

    # remove the temporary wav file
    if audio_wav_file != local_audio_file:
        os.remove(audio_wav_file)

    # rename the file to the desired one
    if real_converted_local_audio_file != converted_local_audio_file:
        os.replace(real_converted_local_audio_file, converted_local_audio_file)
Exemplo n.º 6
0
def extract_pytorch_parms(pymodel,
                          layer_name,
                          layer_type,
                          layer_dim,
                          layer_keywords,
                          extra_keywords=None):
    '''
    Extract correct tensor(s) from a PyTorch model state dictionary

    Parameters
    ----------
    pymodel : PyTorch model
        Specifies the PyTorch model object.
    layer_name: string
        Specifies the PyTorch layer name.
    layer_type: int
        Specifyies the layer type (see BertCommon for layer types).
    layer_dim: list of int
        Specifies the dimensions of a tensor.
    layer_keywords : list of strings
        Specifies the keywords to search for in the model dictionary.
    extra_keywords : list of strings or None, optional
        Specifies the extra keywords for a multi-head attention layer.
        This is mandatory for multi-head attention and any other layer(s)
        where there could be ambiguity between two layers with the same
        type.
        Default : None

    Returns
    -------
    weight, bias parameters

    '''

    ptensor_wgt = None
    ptensor_bias = None

    if layer_type == BertCommon['layer_types']['noparms']:
        pass
    elif layer_type == BertCommon['layer_types']['layernorm']:
        if extra_keywords is None:
            key_list = layer_keywords + ['weight']
        else:
            key_list = layer_keywords + extra_keywords + ['weight']

        # weights
        pname, ptensor_wgt = find_pytorch_tensor(pymodel, key_list, layer_dim)
        if pname == None:
            raise DLPyError('Cannot find weights for layer ' + layer_name)
        # bias
        key_list[-1] = 'bias'
        pname, ptensor_bias = find_pytorch_tensor(pymodel, key_list, layer_dim)
        if pname == None:
            print('NOTE: No bias for layer ' + layer_name)
    elif layer_type == BertCommon['layer_types']['dense']:
        if extra_keywords is None:
            key_list = layer_keywords + ['weight']
        else:
            key_list = layer_keywords + extra_keywords + ['weight']

        # weights
        pname, ptensor_wgt = find_pytorch_tensor(pymodel, key_list, layer_dim)
        if pname == None:
            raise DLPyError('Cannot find weights for layer ' + layer_name)
        # bias
        # NOTE: bias name and dimensions not unique in attention layer so construct bias tensor name from
        # weight tensor name
        bias_str = pname[0].replace('weight', 'bias')
        if bias_str in pymodel.state_dict():
            ptensor_bias = [pymodel.state_dict()[bias_str].numpy()]
        else:
            print('NOTE: No bias for layer ' + layer_name)
    elif layer_type == BertCommon['layer_types']['mhattention']:

        # NOTE: for affine transformations, Pytorch uses a linear layer that implements
        # the following operation
        #
        # y = x*A^T + b
        #
        # where A is stored in the form (output_dimension, input_dimension) in the state dictionary
        #
        # SAS Deep Learning implements an affine transformation as follows
        #
        # y = A*x + b
        #
        # where A is stored in the form (output_dimension, input_dimension) in column-major order.
        #
        # For dense (fully-connected) layers, the key is the A matrix which is stored identically
        # in both cases so it can be imported directly without any manipulation.  The Pytorch version of
        # Multi-head attention uses several linear layers in the implementation.  The SAS Deep Learning
        # version is self-contained, and implements something like
        #
        # y = x*A + b
        #
        # for these linear layers.  This means that the A matrices used by the SAS Deep Learning version of
        # multi-head attention must be transposed before importing.

        for ii, mha_keys in enumerate(extra_keywords):

            # weights
            pname, tmp_wgt = find_pytorch_tensor(
                pymodel, layer_keywords + mha_keys + ['weight'], layer_dim)
            if pname == None:
                raise DLPyError('Cannot find ' + str(mha_keys) +
                                ' weights for layer ' + layer_name)
            else:
                if len(tmp_wgt) > 1:
                    raise DLPyError('Multiple matches for ' + str(mha_keys) +
                                    ' weights for layer ' + layer_name)
                else:
                    if ii == 0:
                        tensor_wgt = np.transpose(tmp_wgt[0].copy())
                    else:
                        tensor_wgt = np.concatenate(
                            (tensor_wgt, np.transpose(tmp_wgt[0])), axis=1)

            # bias
            pname, tmp_bias = find_pytorch_tensor(
                pymodel, layer_keywords + mha_keys + ['bias'], [layer_dim[0]])
            if pname == None:
                print('NOTE: No ' + str(mha_keys) + ' bias for layer ' +
                      layer_name)
            else:
                if 'tensor_bias' in locals():
                    tensor_bias = np.concatenate((tensor_bias, tmp_bias[0]))
                else:
                    tensor_bias = tmp_bias[0].copy()

        ptensor_wgt = [tensor_wgt]
        if 'tensor_bias' in locals():
            ptensor_bias = [tensor_bias]

    else:
        raise DLPyError('Layer ' + layer_name +
                        ' is an unsupported layer type')

    return ptensor_wgt, ptensor_bias
Exemplo n.º 7
0
def bert_prepare_data(conn,
                      tokenizer,
                      max_seq_len,
                      input_a,
                      segment_vocab_size=None,
                      input_b=None,
                      target=None,
                      obs_weight=None,
                      extra_var=None,
                      neutral_label=None,
                      train_fraction=None,
                      classification_problem=True,
                      seed=777777777,
                      verbose=False):
    '''
    Prepare data for a BERT model variant

    Parameters
    ----------
    conn : CAS Connection
        Specifies the CAS connection
    tokenizer : :class:PreTrainedTokenizer object
        Specifies the tokenizer.
    max_seq_len: int
        Specifies the maximum sequence length (maximum number of tokens).
    input_a : list of strings
        Specifies the text data for a single segment task.
    segment_vocab_size : int
        Specifies the segment vocabulary size.  The value should be
        one of 0 for DistilBERT, 1 for RoBERTa, or 2 for BERT.
        Default: None
    input_b : list of strings, optional
        Specifies the text data for a two segment task.
        Default: None
    target: list or list of lists, optional
        Specifies the target data.  Target data must be a numeric type.
        This means that nominal values must be translated to integer class levels.
        Default: None
    obs_weight: list of float/integers
        Specifies the observation weights.
        Default: None
    extra_var: list of dictionaries
        Specifies the extra variable(s) to include in the Viya table(s).
        Each dictionary in the list must have the following keys
            name: string, specifies the name of the extra variable
            values: list, specifies the variable values
            type: string, must be either VARCHAR for characer values or NUMERIC for numeric values
        Default: None
    neutral_label: string, optional
        Specifies the "don't care" or neutral target label for multi-target classification tasks.
        This is not optional if target is a list of lists.
        Default: None
    train_fraction: float, optional
        Specifies the fraction of the data used for training.  Must be between 0.0 and 1.0.
        Default: None
    classification_problem: boolean, optional
        Specifies whether the data is for a classification or regression problem.
        Default: True
    seed: int, optional
        Specifies the seed to use for the random number generator for splitting data into
        train and test data sets.
        Default: 777777777
    verbose: boolean, optional
        Specifies whether progress messages and summary statistics are displayed.
        Default: False
        
    Returns
    -------
     -> number of target variables (if target specified) or None.
     -> if train fraction specified : names of the Viya tables that hold the training and test/validation data sets 
        otherwise : name of the data set
        
    '''

    # define input variables
    ds_vars = dict(token_var=BertCommon['variable_names']['token_var'],
                   position_var=BertCommon['variable_names']['position_var'],
                   segment_var=BertCommon['variable_names']['segment_var'])

    # error checking
    if not isinstance(input_a, list):
        raise DLPyError('Input A must be a list')

    if input_b is not None:
        if not isinstance(input_b, list):
            raise DLPyError('Input B must be a list')

        if len(input_a) != len(input_b):
            raise DLPyError("Mismatch in lengths of input A and input B lists")

    if target is not None:
        if not isinstance(target, list):
            raise DLPyError('Target must be a list')

        if len(input_a) != len(target):
            raise DLPyError("Mismatch in lengths of input A and target lists")

        # target variable and length variable
        ds_vars['target_var'] = BertCommon['variable_names']['target_var']
        ds_vars['target_len_var'] = BertCommon['variable_names'][
            'target_len_var']

    if obs_weight is not None:
        if not isinstance(obs_weight, list):
            raise DLPyError('Observation weights must be a list')

        if len(input_a) != len(obs_weight):
            raise DLPyError(
                "Mismatch in lengths of input A and observation weight lists")

        if target is None:
            raise DLPyError("Weight specified without target variable.")

        # weight variable
        ds_vars['weight_var'] = BertCommon['variable_names']['weight_var']

    if extra_var is not None:
        extra_var_names = [None] * len(extra_var)
        extra_var_types = [None] * len(extra_var)

        if not isinstance(extra_var, list):
            raise DLPyError('Extra variables must be a list')

        for ii, ev_dict in enumerate(extra_var):
            if not isinstance(ev_dict, dict):
                raise DLPyError(
                    'Argument extra_var must be a list of dictionaries')

            if 'name' in ev_dict:
                extra_var_names[ii] = ev_dict['name']
            else:
                raise DLPyError('extra_var[' + str(ii) +
                                '] missing "name" key.')

            if ('type' in ev_dict) and (ev_dict['type'].upper()
                                        in ['VARCHAR', 'NUMERIC']):
                extra_var_types[ii] = ev_dict['type'].upper()
            else:
                raise DLPyError(
                    'extra_var[' + str(ii) +
                    '] missing "type" key, or an invalid type was specified.')

            if ('values' not in ev_dict) or (
                    not (isinstance(ev_dict['values'], list) and
                         (len(input_a) == len(ev_dict['values'])))):
                raise DLPyError(
                    'extra_var[' + str(ii) +
                    '] missing "values" key, the values are not a list object, '
                    'or there is a mismatch in lengths of input A and values lists.'
                )

    else:
        extra_var_names = None
        extra_var_types = None

    if (train_fraction is not None) and ((train_fraction < 0.0) or
                                         (train_fraction > 1.0)):
        raise DLPyError('train_fraction must be between 0 and 1')

    if segment_vocab_size is None:
        raise DLPyError(
            "You must specify a segment vocabulary size.  See the Bert model "
            "configuration object (e.g. BertConfig['type_vocab_size'] for the "
            "correct value.")
    else:
        if segment_vocab_size not in [0, 1, 2]:
            raise DLPyError('Vocabulary size ' + str(segment_vocab_size) +
                            ' is invalid. '
                            'The value must be 0, 1, or 2.')

    # initialize lists
    token_strings = [None] * len(input_a)
    position_strings = [None] * len(input_a)
    if segment_vocab_size > 0:
        segment_strings = [None] * len(input_a)
    if target is not None:
        target_array = [None] * len(input_a)
        tgtlen_array = [None] * len(input_a)
    if obs_weight is not None:
        weight_array = [None] * len(input_a)
    if extra_var is not None:
        extra_var_array = [None] * len(input_a)

    num_truncated = 0
    obs_idx = 0
    ten_percent = int(0.1 * len(input_a))
    multiple_targets = False
    for ii, txt_a in enumerate(input_a):

        # provide feedback
        if verbose:
            if (ii > 0) and (ii % ten_percent == 0):
                print("NOTE: " + str(int(round(ii * 100.0 / len(input_a)))) +
                      "% of the observations tokenized.")

        # simple data cleaning, skip observations where input A is invalid
        if len(txt_a) == 0:
            continue
        else:
            txt_a_untok = txt_a
            txt_a = tokenizer.tokenize(txt_a)
            txt_a = txt_a[:min([
                max_seq_len, len(txt_a)
            ])]  # NOTE: this suppresses an unnecessary logger warning

        # simple data cleaning, skip observations where input B is invalid
        if input_b is not None:
            txt_b = input_b[ii]
            txt_b_untok = txt_b
            if len(txt_b) == 0:
                continue
            else:
                txt_b = tokenizer.tokenize(txt_b)
                txt_b = txt_b[:min([
                    max_seq_len, len(txt_b)
                ])]  # NOTE: this supresses an unnecessary logger warning
        else:
            txt_b = None
            txt_b_untok = None

        # simple data cleaning, skip observations where target is invalid (i.e. not numeric data)
        if target is not None:
            cur_tgt = target[ii]

            if isinstance(cur_tgt, list):
                tst_val = cur_tgt[0]
            else:
                tst_val = cur_tgt

            if not isinstance(tst_val, Number):
                continue
        else:
            cur_tgt = None

        # observation weight
        if obs_weight is not None:
            cur_wgt = obs_weight[ii]
            if not isinstance(cur_wgt, Number):
                raise DLPyError('Observation weights must be a numeric type.')
        else:
            cur_wgt = None

        # extra variable(s)
        if extra_var is not None:
            cur_extra_var = [None] * len(extra_var)
            for jj, ev_dict in enumerate(extra_var):
                cur_extra_var[jj] = ev_dict['values'][ii]
        else:
            cur_extra_var = None

        # tokenize text
        txt_encoding = tokenizer.encode_plus(txt_a,
                                             text_pair=txt_b,
                                             add_special_tokens=True,
                                             return_special_tokens_mask=True,
                                             max_length=max_seq_len)
        tmp_tokenized_text = tokenizer.convert_ids_to_tokens(
            txt_encoding['input_ids'])

        # set segment ID
        if segment_vocab_size == 2:
            seg_idx = txt_encoding['token_type_ids']
        elif segment_vocab_size == 1:
            seg_idx = [0] * len(tmp_tokenized_text)
        else:
            seg_idx = None

        # check for truncated sequence(s)
        if 'num_truncated_tokens' in txt_encoding:
            num_truncated += 1

        # tokenization error-checking
        num_tokens = len(tmp_tokenized_text)
        tokenized_text = [None] * num_tokens
        for jj in range(num_tokens):

            if tmp_tokenized_text[jj] in BertCommon['reserved_names']:
                raise DLPyError('Match for reserved names: ' +
                                tmp_tokenized_text[jj])
            elif tmp_tokenized_text[jj] in BertCommon['special_chars']:
                tokenized_text[jj] = '[' + tmp_tokenized_text[jj] + ']'
            else:
                tokenized_text[jj] = tmp_tokenized_text[jj]

        # verify targets match inputs for sequence labeling tasks (assume single segment only for now)
        if isinstance(cur_tgt, list):
            multiple_targets = True

            if neutral_label is None:
                raise DLPyError(
                    "Neutral label must be specified for sequence labeling tasks."
                )

            if txt_b_untok is None:
                num_words = len(txt_a_untok.split(
                    BertCommon['text_delimiter']))
            else:
                num_words = (
                    len(txt_a_untok.split(BertCommon['text_delimiter'])) +
                    len(txt_b_untok.split(BertCommon['text_delimiter'])))

            num_tgts = len(cur_tgt)
            if num_words != num_tgts:
                raise DLPyError(
                    "Mismatch in length of input/target for observation " +
                    str(ii))

            # tokenization adds special tokens and may split words into multiple tokens.  Add
            # neutral labels for special tokens and repeat target labels for words split by
            # tokenization.
            new_tgt = [
                neutral_label if mask == 1 else None
                for mask in txt_encoding['special_tokens_mask']
            ]
            txt_words = txt_a_untok.split(BertCommon['text_delimiter'])
            if txt_b_untok is not None:
                txt_words += txt_b_untok.split(BertCommon['text_delimiter'])

            idx = 0
            for cur_word, cur_label in zip(txt_words, cur_tgt):
                # skip over special token(s)
                if txt_encoding['special_tokens_mask'][idx] == 1:
                    idx += [
                        jj for jj, val in enumerate(
                            txt_encoding['special_tokens_mask'][idx:])
                        if val == 0
                    ][0]

                word_tokens = tokenizer.tokenize(cur_word)
                new_tgt[idx:idx +
                        len(word_tokens)] = [cur_label] * len(word_tokens)
                idx += len(word_tokens)

            cur_tgt = new_tgt.copy()

        # check for defective observation (i.e. must have at least beginning and ending
        # "special" tokens for a valid observation (e.g. [CLS] tok1 tok2 ... [SEP] for
        # a BERT model)
        if sum(txt_encoding['special_tokens_mask']) >= 2:
            token_strings[obs_idx] = BertCommon['text_delimiter'].join(
                tokenized_text)

            # position
            tokenized_position = [None] * num_tokens
            for jj in range(num_tokens):
                tokenized_position[jj] = BertCommon['reserved_names'][
                    'position_prefix'] + str(jj)

            position_strings[obs_idx] = BertCommon['text_delimiter'].join(
                tokenized_position)

            # segment
            if segment_vocab_size > 0:
                tokenized_segment = [None] * num_tokens
                for jj in range(num_tokens):
                    tokenized_segment[jj] = BertCommon['reserved_names'][
                        'segment_prefix'] + str(seg_idx[jj])

                segment_strings[obs_idx] = BertCommon['text_delimiter'].join(
                    tokenized_segment)

            # target
            if cur_tgt is not None:
                if classification_problem:
                    if isinstance(cur_tgt, list):
                        # zero pad target list
                        target_array[obs_idx] = [str(0)] * max_seq_len
                        for jj, tgt in enumerate(cur_tgt):
                            target_array[obs_idx][jj] = str(int(tgt))

                        tgtlen_array[obs_idx] = len(cur_tgt)
                    else:
                        target_array[obs_idx] = str(int(cur_tgt))
                        tgtlen_array[obs_idx] = 1
                else:
                    if isinstance(cur_tgt, list):
                        raise DLPyError(
                            'Multiple regression problems not supported.')
                    else:
                        target_array[obs_idx] = cur_tgt
                        tgtlen_array[obs_idx] = 1

            # weight
            if cur_wgt is not None:
                weight_array[obs_idx] = cur_wgt

            # extra variable(s)
            if cur_extra_var is not None:
                extra_var_array[obs_idx] = cur_extra_var

            # increment the valid observation index
            obs_idx += 1
        else:
            print('WARNING: observation #: ' + str(ii))
            raise DLPyError('Input string could not be tokenized.')

    if verbose:
        print("NOTE: all observations tokenized.\n")

    # reduce lists and inform user if one or more observations discarded
    if obs_idx < len(input_a):
        token_strings = token_strings[0:obs_idx]
        position_strings = position_strings[0:obs_idx]
        if segment_vocab_size > 0:
            segment_strings = segment_strings[0:obs_idx]
        if target is not None:
            target_array = target_array[0:obs_idx]
            tgtlen_array = tgtlen_array[0:obs_idx]
        if obs_weight is not None:
            weight_array = weight_array[0:obs_idx]
        if extra_var is not None:
            extra_var_array = extra_var_array[0:obs_idx]

        print(
            'NOTE: observations with empty/invalid input or targets were discarded.  There are\n'
            '' + str(obs_idx) + ' out of ' + str(len(input_a)) +
            ' observations remaining.\n')

    # inform user if one or more observations truncated
    if num_truncated > 0:
        print(
            'WARNING: ' + str(num_truncated) + ' out of ' + str(len(input_a)) +
            ' observations exceeded the maximum sequence length\n'
            'These observations have been truncated so that only the first ' +
            str(max_seq_len) + ' tokens are used.\n')

    # set up variable names/types
    if segment_vocab_size > 0:
        var_names = [
            ds_vars['token_var'], ds_vars['position_var'],
            ds_vars['segment_var']
        ]
        var_type = ['VARCHAR', 'VARCHAR', 'VARCHAR']
    else:
        var_names = [ds_vars['token_var'], ds_vars['position_var']]
        var_type = ['VARCHAR', 'VARCHAR']

    num_target_var = None
    if target is not None:
        if multiple_targets:
            num_target_var = max_seq_len
        else:
            num_target_var = 1

        var_names += generate_target_var_names(ds_vars, num_target_var)
        var_names += [ds_vars['target_len_var']]
        if classification_problem:
            var_type += ['VARCHAR'] * num_target_var + ['NUMERIC']
        else:
            var_type += ['NUMERIC'] * num_target_var + ['NUMERIC']

    if obs_weight is not None:
        var_names += [ds_vars['weight_var']]
        var_type += ['NUMERIC']

    if extra_var is not None:
        var_names += extra_var_names
        var_type += extra_var_types

    # check whether splitting to training/testing data sets or just a single data set
    if (train_fraction is not None) and (train_fraction > 0.0):
        np.random.seed(seed=seed)
        idx_prob = np.random.uniform(low=0.0, high=1.0, size=(obs_idx, ))
        num_train = 0
        num_test = 0
        for ii in range(obs_idx):
            if idx_prob[ii] < train_fraction:
                num_train += 1
            else:
                num_test += 1

        # split data to train/test data sets
        # token, position, segment
        train_token_strings = [None] * num_train
        train_position_strings = [None] * num_train
        if segment_vocab_size > 0:
            train_segment_strings = [None] * num_train
        #
        test_token_strings = [None] * num_test
        test_position_strings = [None] * num_test
        if segment_vocab_size > 0:
            test_segment_strings = [None] * num_test
        # target
        if target is not None:
            train_target_array = [None] * num_train
            train_tgtlen_array = [None] * num_train
            #
            test_target_array = [None] * num_test
            test_tgtlen_array = [None] * num_test
        # weight
        if obs_weight is not None:
            train_weight_array = [None] * num_train
            #
            test_weight_array = [None] * num_test
        # extra variable(s)
        if extra_var is not None:
            train_extra_var_array = [None] * num_train
            #
            test_extra_var_array = [None] * num_test

        train_idx = 0
        test_idx = 0
        for ii in range(obs_idx):
            if idx_prob[ii] < train_fraction:  # train data set
                train_token_strings[train_idx] = token_strings[ii]
                train_position_strings[train_idx] = position_strings[ii]
                if segment_vocab_size > 0:
                    train_segment_strings[train_idx] = segment_strings[ii]

                # NOTE: each element of train target array may be a value or a list
                if target is not None:
                    train_target_array[train_idx] = target_array[ii]
                    train_tgtlen_array[train_idx] = tgtlen_array[ii]

                if obs_weight is not None:
                    train_weight_array[train_idx] = weight_array[ii]

                # NOTE: each element of train extra var array is a list
                if extra_var is not None:
                    train_extra_var_array[train_idx] = extra_var_array[ii]

                train_idx += 1
            else:  # test data set
                test_token_strings[test_idx] = token_strings[ii]
                test_position_strings[test_idx] = position_strings[ii]
                if segment_vocab_size > 0:
                    test_segment_strings[test_idx] = segment_strings[ii]

                # NOTE: each element of test target array may be a value or a list
                if target is not None:
                    test_target_array[test_idx] = target_array[ii]
                    test_tgtlen_array[test_idx] = tgtlen_array[ii]

                if obs_weight is not None:
                    test_weight_array[test_idx] = weight_array[ii]

                # NOTE: each element of test extra var array is a list
                if extra_var is not None:
                    test_extra_var_array[test_idx] = extra_var_array[ii]

                test_idx += 1

        # create CAS table for training data
        train_data_set = 'bert_train_data'
        if segment_vocab_size > 0:
            dlist = [
                train_token_strings, train_position_strings,
                train_segment_strings
            ]
        else:
            dlist = [train_token_strings, train_position_strings]

        if target is not None:
            if isinstance(train_target_array[0], list):
                for ii in range(len(train_target_array[0])):
                    tmp_array = [
                        train_target_array[jj][ii] for jj in range(train_idx)
                    ]
                    dlist += [tmp_array]
            else:
                dlist += [train_target_array]

            dlist += [train_tgtlen_array]

        if obs_weight is not None:
            dlist += [train_weight_array]

        if extra_var is not None:
            for ii in range(len(train_extra_var_array[0])):
                tmp_array = [
                    train_extra_var_array[jj][ii] for jj in range(train_idx)
                ]
                dlist += [tmp_array]

        if verbose:
            print("NOTE: uploading training data to table " + train_data_set +
                  ".")
            print("NOTE: there are " + str(num_train) +
                  " observations in the training data set.\n")

        handler1 = BertDMH(dlist, var_names, var_type)
        conn.retrieve('table.addtable',
                      _messagelevel='error',
                      table=train_data_set,
                      replace=True,
                      **handler1.args.addtable)

        # create CAS table for test data
        test_data_set = 'bert_test_validation_data'
        if segment_vocab_size > 0:
            dlist = [
                test_token_strings, test_position_strings, test_segment_strings
            ]
        else:
            dlist = [test_token_strings, test_position_strings]

        if target is not None:
            if isinstance(test_target_array[0], list):
                for ii in range(len(test_target_array[0])):
                    tmp_array = [
                        test_target_array[jj][ii] for jj in range(test_idx)
                    ]
                    dlist += [tmp_array]
            else:
                dlist += [test_target_array]

            dlist += [test_tgtlen_array]

        if obs_weight is not None:
            dlist += [test_weight_array]

        if extra_var is not None:
            for ii in range(len(test_extra_var_array[0])):
                tmp_array = [
                    test_extra_var_array[jj][ii] for jj in range(test_idx)
                ]
                dlist += [tmp_array]

        if verbose:
            print("NOTE: uploading test/validation data to table " +
                  test_data_set + ".")
            print("NOTE: there are " + str(num_test) +
                  " observations in the test/validation data set.\n")

        handler2 = BertDMH(dlist, var_names, var_type)
        conn.retrieve('table.addtable',
                      _messagelevel='error',
                      table=test_data_set,
                      replace=True,
                      **handler2.args.addtable)

        if verbose:
            print("NOTE: training and test/validation data sets ready.\n")

        return num_target_var, train_data_set, test_data_set
    else:
        # single data set
        unified_data_set = 'bert_data'
        if segment_vocab_size > 0:
            dlist = [token_strings, position_strings, segment_strings]
        else:
            dlist = [token_strings, position_strings]

        if target is not None:
            if isinstance(target_array[0], list):
                for ii in range(len(target_array[0])):
                    tmp_array = [target_array[jj][ii] for jj in range(obs_idx)]
                    dlist += [tmp_array]
            else:
                dlist += [target_array]

            dlist += [tgtlen_array]

        if obs_weight is not None:
            dlist += [weight_array]

        if extra_var is not None:
            for ii in range(len(extra_var_array[0])):
                tmp_array = [extra_var_array[jj][ii] for jj in range(obs_idx)]
                dlist += [tmp_array]

        if verbose:
            print("NOTE: uploading data to table " + unified_data_set + ".")
            print("NOTE: there are " + str(obs_idx) +
                  " observations in the data set.\n")

        handler = BertDMH(dlist, var_names, var_type)
        conn.retrieve('table.addtable',
                      _messagelevel='error',
                      table=unified_data_set,
                      replace=True,
                      **handler.args.addtable)

        if verbose:
            print("NOTE: data set ready.\n")

        return num_target_var, unified_data_set
def write_keras_hdf5(model, rnn_support, hdf5_out):
    '''
    Generate an HDF5 file with trained model parameters given a Keras definition

    Parameters
    ----------
    model : Keras model
       Keras deep learning model
    rnn_support : boolean
       Indicates whether importing RNN models is supported
    hdf5_out : string
       Fully qualified file name of SAS-compatible HDF5 file

    '''
    # open output file
    try:
        f_out = h5py.File(hdf5_out, 'w')
    except IOError:
        raise DLPyError('The specified file cannot be written: ' + hdf5_out)

    model_type = None
    use_gpu = None
    try:
        # determine model type
        # NOTE: must check ALL RNN layers to determine
        #       whether model must run on GPU
        gpu_layers = []
        cpu_layers = []
        for layer in model.layers:
            class_name, sublayers = remove_layer_wrapper(layer)
            for tlayer in sublayers:
                # check for RNN layers
                if class_name in rnn_layer_classes:
                    model_type = 'RNN'
                    image_data_format = None
                    if class_name in rnn_gpu_layer_classes:
                        gpu_layers.append(True)
                    elif class_name in rnn_cpu_layer_classes:
                        cpu_layers.append(True)

        # verify that model is supported by SAS Deep Learning
        if model_type == 'RNN':
            if rnn_support:
                if (len(gpu_layers) > 0) and (len(cpu_layers) == 0):
                    use_gpu = True
                elif (len(gpu_layers) == 0) and (len(cpu_layers) > 0):
                    use_gpu = False
                elif (len(gpu_layers) > 0) and (len(cpu_layers) > 0):
                    raise DLPyError('A mixture of CPU and GPU layers was detected. '
                                    'This is not supported by SAS Deep Learning.')
            else:
                raise DLPyError('RNN model detected: your Viya deployment does not support '
                                'importing an RNN model.')
                
        if model_type is None:
            found_cnn_layer = False
            for layer in model.layers:
                class_name, sublayers = remove_layer_wrapper(layer)
                for tlayer in sublayers:
                    # check for CNN layers
                    if class_name in conv_layer_classes:
                        model_type = 'CNN'
                        image_data_format = K.image_data_format()
                        found_cnn_layer = True
                
                if found_cnn_layer:
                    break

        if model_type is None:
            raise DLPyError('Only RNN and CNN models are currently supported.')

        # determine layers with weights
        filtered_layers = []
        filtered_layer_names = []
        for layer in model.layers:
            weights = layer.weights
            if weights:
                filtered_layers.append(layer)
                filtered_layer_names.append(layer.name)
                
        # determine permutation vector associated with flattening layer (if it exists)
        if model_type == 'CNN':
            flatten_layer_index = -1
            index = 0
            for layer in model.layers:
                if layer.__class__.__name__.lower() == 'flatten':
                    flatten_layer_index = index
                    break
                index = index + 1

            if flatten_layer_index != -1:
                layer = model.layers[flatten_layer_index]
                permute_layer_name = model.layers[flatten_layer_index + 1].name
                if image_data_format == 'channels_first':
                    C, H, W = (layer.input_shape)[1:]
                else:
                    H, W, C = (layer.input_shape)[1:]
                N = (layer.output_shape)[1]
                perm_index = [0] * N
                if image_data_format == 'channels_last':
                    ii = 0
                    for cc in range(C):
                        for hh in range(H):
                            for ww in range(W):
                                perm_index[ii] = hh * W * C + ww * C + cc
                                ii = ii + 1
                else:
                    for nn in range(N):
                        perm_index[nn] = nn
            else:
                perm_index = []
                permute_layer_name = None
        else:
            perm_index = []
            permute_layer_name = None

        # populate attributes with layer names
        attrib_layer_names = []
        for name in filtered_layer_names:
            layer = model.get_layer(name=name)
            class_name, sublayers = remove_layer_wrapper(layer)
            for tlayer in sublayers:
                attrib_layer_names.append(tlayer.name)
                            
        f_out.attrs['layer_names'] = [replace_forward_slash(l).encode('utf8') for l in attrib_layer_names]            
        # let Keras read weights, reformat, and write to SAS-compatible file
        for k, layer in enumerate(filtered_layers):
            symbolic_weights = layer.weights
            weight_values = K.batch_get_value(symbolic_weights)
            weight_names = []
            for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
                if hasattr(w, 'name') and w.name:
                    name = str(w.name)
                else:
                    name = 'param_' + str(i)
                #weight_names.append(name.encode('utf8'))
                weight_names.append(name)

            # layer modification from here:
            new_weight_names = []

            if len(weight_values) != len(symbolic_weights):
                raise ValueError('Layer #' + str(k) +
                                 ' (named "' + layer.name +
                                 '" in the current model) was found to '
                                 'correspond to layer ' + name +
                                 ' in the saved file. '
                                 'However the new layer ' + layer.name +
                                 ' expects ' + str(len(symbolic_weights)) +
                                 ' weights, but the saved weights have ' +
                                 str(len(weight_values)) +
                                 ' elements.')
                                 
            # create CPU-compatible layer
            cpu_layer = create_cpu_compatible_layer(layer, model_type)
                                 
            # read/write weights
            class_name, sublayers = remove_layer_wrapper(layer)
            for tlayer in sublayers:
                g_out = f_out.create_group(replace_forward_slash(tlayer.name))
                new_weight_names = []
                wgt_idx = 0
            
                for ii,wgt_name in enumerate(weight_names):
                    if tlayer.name in wgt_name:
                        if type(weight_values[ii]) == np.ndarray:
                            tensor_in = weight_values[ii]
                        else:
                            tensor_in = np.zeros(weight_values[ii].shape,
                                                dtype=weight_values[ii].dtype)
                            weight_values[ii].read_direct(tensor_in)

                        # permute axes as needed to conform to SAS deep
                        # learning "channels first" format
                        if (image_data_format is not None) and (image_data_format == 'channels_first'):
                            # format: (C,fdim1, fdim2, fdim3) ==> (C,fdim3,fdim1,fdim2)
                            if len(tensor_in.shape) == 4:
                                tensor_out = np.transpose(tensor_in, (0, 3, 1, 2))
                            else:
                                tensor_out = tensor_in.copy()
                        else:
                            # "channels last" format or not image processing problem
                            
                            # process RNN layers first
                            if class_name in rnn_layer_classes:
                                cpu_class_name, cpu_sublayers = remove_layer_wrapper(cpu_layer)
                                if (len(tensor_in.shape) == 1) and (class_name != cpu_class_name):
                                    tensor_out = np.tile(0.5 * tensor_in, 2)
                                else:
                                    tensor_out = tensor_in.copy()
                            # not an RNN layer, but this is a vector - nothing to permute
                            elif len(tensor_in.shape) == 1:
                                tensor_out = tensor_in.copy()
                            else:
                                # permute Conv2D tensor to "channels_first" format
                                if class_name == 'conv2d':
                                    tensor_out = np.transpose(tensor_in, (3, 2, 0, 1))
                                # have to account for neuron ordering in first dense
                                # layer following flattening operation
                                elif class_name == 'dense':
                                    if (permute_layer_name is not None) and (tlayer.name == permute_layer_name):
                                        tensor_out = np.zeros(tensor_in.shape)
                                        for jj in range(tensor_out.shape[0]):
                                            tensor_out[jj, :] = tensor_in[perm_index[jj], :]
                                    else:  # not following flattening, just copy
                                        tensor_out = tensor_in.copy()

                                    # mimic Caffe layout
                                    tensor_out = np.transpose(tensor_out, (1, 0))

                        # save weight in format amenable to SAS
                        dset_name = generate_dataset_name(tlayer, wgt_idx)
                        wgt_idx = wgt_idx + 1
                        new_weight_names.append(dset_name)
                        g_out.create_dataset(dset_name, data=tensor_out)

                # update weight names
                g_out.attrs['weight_names'] = new_weight_names

    except ValueError as err_msg:
        print(err_msg)

    finally:
        # close files
        f_out.close()

    return use_gpu
def write_keras_hdf5_from_file(model, rnn_support, hdf5_in, hdf5_out):
    '''
    Generate an HDF5 file with trained model parameters given a Keras definition

    Parameters
    ----------
    model : Keras model
       Keras deep learning model
    rnn_support : boolean
       Indicates whether importing RNN models is supported
    hdf5_in : string
       Fully qualified file name of Keras HDF5 file
    hdf5_out : string
       Fully qualified file name of SAS-compatible HDF5 file

    '''
    # open input/output files
    if os.path.isfile(hdf5_in):
        f_in = h5py.File(hdf5_in, 'r')
        try:
            f_out = h5py.File(hdf5_out, 'w')
        except IOError:
            raise DLPyError('The specified file cannot be written: ' + hdf5_out)
    else:
        raise DLPyError('The specified file does not exist: ' + hdf5_in)

    if 'keras_version' in f_in.attrs:
        original_keras_version = f_in.attrs['keras_version'].decode('utf8')
    else:
        original_keras_version = '1'

    if 'backend' in f_in.attrs:
        original_backend = f_in.attrs['backend'].decode('utf8')
    else:
        original_backend = None

    model_type = None
    use_gpu = None
    try:
        # determine model type
        # NOTE: must check ALL RNN layers to determine
        #       whether model must run on GPU
        gpu_layers = []
        cpu_layers = []
        for layer in model.layers:
            class_name, sublayers = remove_layer_wrapper(layer)
            for tlayer in sublayers:
                # check for RNN layers
                if class_name in rnn_layer_classes:
                    model_type = 'RNN'
                    image_data_format = None
                    if class_name in rnn_gpu_layer_classes:
                        gpu_layers.append(True)
                    elif class_name in rnn_cpu_layer_classes:
                        cpu_layers.append(True)

        # verify that model is supported by SAS Deep Learning
        if model_type == 'RNN':
            if rnn_support:
                if (len(gpu_layers) > 0) and (len(cpu_layers) == 0):
                    use_gpu = True
                elif (len(gpu_layers) == 0) and (len(cpu_layers) > 0):
                    use_gpu = False
                elif (len(gpu_layers) > 0) and (len(cpu_layers) > 0):
                    raise DLPyError('A mixture of CPU and GPU layers was detected. '
                                    'This is not supported by SAS Deep Learning.')
            else:
                raise DLPyError('RNN model detected: your Viya deployment does not support '
                                'importing an RNN model.')
                
        if model_type is None:
            found_cnn_layer = False
            for layer in model.layers:
                class_name, sublayers = remove_layer_wrapper(layer)
                for tlayer in sublayers:
                    # check for CNN layers
                    if class_name in conv_layer_classes:
                        model_type = 'CNN'
                        image_data_format = K.image_data_format()
                        found_cnn_layer = True
                
                if found_cnn_layer:
                    break

        if model_type is None:
            raise DLPyError('Only RNN and CNN models are currently supported.')
            
        # navigate to correct HDF5 group
        if 'layer_names' in f_in.attrs.keys():
            root_group = f_in
        elif 'layer_names' in f_in['model_weights'].attrs.keys():
            root_group = f_in['model_weights']
        else:
            raise DLPyError('Cannot read HDF5 file correctly')
        
        # determine layers with weights
        filtered_layers = []
        for layer in model.layers:
            weights = layer.weights
            if weights:
                filtered_layers.append(layer)

        layer_names = [n.decode('utf8') for n in root_group.attrs['layer_names']]
        filtered_layer_names = []
        for name in layer_names:
            g = root_group[name]
            weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
            if weight_names:
                filtered_layer_names.append(name)
        
        layer_names = filtered_layer_names
        if len(layer_names) != len(filtered_layers):
            raise ValueError('You are trying to load a weight file '
                             'containing ' + str(len(layer_names)) +
                             ' layers into a model with ' +
                             str(len(filtered_layers)) + ' layers.')

        # determine permutation vector associated with flattening layer (if it exists)
        if model_type == 'CNN':
            flatten_layer_index = -1
            index = 0
            for layer in model.layers:
                if layer.__class__.__name__.lower() == 'flatten':
                    flatten_layer_index = index
                    break
                index = index + 1

            if flatten_layer_index != -1:
                layer = model.layers[flatten_layer_index]
                permute_layer_name = model.layers[flatten_layer_index + 1].name
                if image_data_format == 'channels_first':
                    C, H, W = (layer.input_shape)[1:]
                else:
                    H, W, C = (layer.input_shape)[1:]
                N = (layer.output_shape)[1]
                perm_index = [0] * N
                if image_data_format == 'channels_last':
                    ii = 0
                    for cc in range(C):
                        for hh in range(H):
                            for ww in range(W):
                                perm_index[ii] = hh * W * C + ww * C + cc
                                ii = ii + 1
                else:
                    for nn in range(N):
                        perm_index[nn] = nn
            else:
                perm_index = []
                permute_layer_name = None
        else:
            perm_index = []
            permute_layer_name = None

        # populate attributes with layer names
        attrib_layer_names = []
        for name in layer_names:
            layer = model.get_layer(name=name)
            class_name, sublayers = remove_layer_wrapper(layer)
            for tlayer in sublayers:
                attrib_layer_names.append(tlayer.name)
                            
        f_out.attrs['layer_names'] = [replace_forward_slash(l).encode('utf8') for l in attrib_layer_names]
        # let Keras read weights, reformat, and write to SAS-compatible file
        for k, name in enumerate(layer_names):
            g_in = root_group[name]
            layer = filtered_layers[k]

            weight_names = [n.decode('utf8') for n in g_in.attrs['weight_names']]
            weight_values = [g_in[weight_name] for weight_name in weight_names]
            symbolic_weights = layer.weights
                        
            # create CPU-compatible layer
            cpu_layer = create_cpu_compatible_layer(layer, model_type)
                                        
            # use Keras to load/preprocess weights
            weight_values = preprocess_weights_for_loading(cpu_layer,
                                                           weight_values,
                                                           original_keras_version,
                                                           original_backend)
                                                                   
            if len(weight_values) != len(symbolic_weights):
                raise ValueError('Layer #' + str(k) +
                                 ' (named "' + layer.name +
                                 '" in the current model) was found to '
                                 'correspond to layer ' + name +
                                 ' in the saved file. '
                                 'However the new layer ' + layer.name +
                                 ' expects ' + str(len(symbolic_weights)) +
                                 ' weights, but the saved weights have ' +
                                 str(len(weight_values)) +
                                 ' elements.')
            if layer.__class__.__name__.lower() == 'batchnormalization':
                bn_gamma = np.ones(weight_values[0].shape,
                                   dtype=weight_values[0].dtype)
                bn_beta = np.zeros(weight_values[0].shape,
                                   dtype=weight_values[0].dtype)
                                   
                layer_config = layer.get_config()
                
                # if scale = False and center = True
                if not layer_config['scale'] and layer_config['center']:
                    weight_values.insert(0, bn_gamma)
                    weight_names.insert(0, replace_forward_slash(layer.name)+'/'+'gamma:0')
                # if scale = True and center = False
                elif layer_config['scale'] and not layer_config['center']:
                    weight_values.insert(1, bn_beta)
                    weight_names.insert(1, replace_forward_slash(layer.name)+'/'+'beta:0')
                # if scale = False and center = False
                elif not layer_config['scale'] and not layer_config['center']:
                    weight_values = [bn_gamma, bn_beta] + weight_values
                    weight_names = [replace_forward_slash(layer.name)+'/'+'gamma:0', 
                                    replace_forward_slash(layer.name)+'/'+'beta:0'] + weight_names
                                    
                # add epsilon to variance values to avoid divide by zero
                if 'epsilon' in layer_config.keys():
                    for ii,wgt_name in enumerate(weight_names):
                        if 'moving_variance' in wgt_name:
                            weight_values[ii] = weight_values[ii] + (layer_config['epsilon']*
                                                                     np.ones(weight_values[ii].shape,
                                                                             dtype=weight_values[ii].dtype))
                    
            # read/write weights
            class_name, sublayers = remove_layer_wrapper(layer)
            for tlayer in sublayers:
                g_out = f_out.create_group(replace_forward_slash(tlayer.name))
                new_weight_names = []
                wgt_idx = 0
            
                for ii,wgt_name in enumerate(weight_names):
                    if tlayer.name in wgt_name:
                        if type(weight_values[ii]) == np.ndarray:
                            tensor_in = weight_values[ii]
                        else:
                            tensor_in = np.zeros(weight_values[ii].shape,
                                                dtype=weight_values[ii].dtype)
                            weight_values[ii].read_direct(tensor_in)

                        # permute axes as needed to conform to SAS deep
                        # learning "channels first" format
                        if (image_data_format is not None) and (image_data_format == 'channels_first'):
                            # format: (C,fdim1, fdim2, fdim3) ==> (C,fdim3,fdim1,fdim2)
                            if len(tensor_in.shape) == 4:
                                tensor_out = np.transpose(tensor_in, (0, 3, 1, 2))
                            else:
                                tensor_out = tensor_in.copy()
                        else:
                            # "channels last" format or not image processing problem
                            
                            # process RNN layers first
                            if class_name in rnn_layer_classes:
                                cpu_class_name, cpu_sublayers = remove_layer_wrapper(cpu_layer)
                                if (len(tensor_in.shape) == 1) and (class_name != cpu_class_name):
                                    tensor_out = np.tile(0.5 * tensor_in, 2)
                                else:
                                    tensor_out = tensor_in.copy()
                            # not an RNN layer, but this is a vector - nothing to permute
                            elif len(tensor_in.shape) == 1:
                                tensor_out = tensor_in.copy()
                            else:
                                # permute Conv2D tensor to "channels_first" format
                                if class_name == 'conv2d':
                                    tensor_out = np.transpose(tensor_in, (3, 2, 0, 1))
                                # have to account for neuron ordering in first dense
                                # layer following flattening operation
                                elif class_name == 'dense':
                                    if (permute_layer_name is not None) and (tlayer.name == permute_layer_name):
                                        tensor_out = np.zeros(tensor_in.shape)
                                        for jj in range(tensor_out.shape[0]):
                                            tensor_out[jj, :] = tensor_in[perm_index[jj], :]
                                    else:  # not following flattening, just copy
                                        tensor_out = tensor_in.copy()

                                    # mimic Caffe layout
                                    tensor_out = np.transpose(tensor_out, (1, 0))

                        # save weight in format amenable to SAS
                        dset_name = generate_dataset_name(tlayer, wgt_idx)
                        wgt_idx = wgt_idx + 1
                        new_weight_names.append(dset_name)
                        g_out.create_dataset(dset_name, data=tensor_out)

                # update weight names
                g_out.attrs['weight_names'] = new_weight_names

    except ValueError as err_msg:
        print(err_msg)

    finally:
        # close files
        f_out.close()
        f_in.close()
        
    return use_gpu
Exemplo n.º 10
0
    def build_embedding_model(cls,
                              branch,
                              model_table=None,
                              embedding_model_type='Siamese',
                              embedding_layer=None,
                              margin=None):
        '''

        Build an embedding model based on a given model branch and model type

        Parameters
        ----------
        branch : Model
            Specifies the base model that is used as branches for embedding model.
        model_table : string or dict or CAS table, optional
            Specifies the CAS table to store the deep learning model.
            Default: None
        embedding_model_type : string, optional
            Specifies the embedding model type that the created table will be applied for training.
            Valid values: Siamese, Triplet, and Quartet.
            Default: Siamese
        embedding_layer: Layer, optional
            Specifies a dense layer as the embedding layer. For instance, Dense(n=10, act='identity') defines
            the embedding dimension is 10. When it is not given, the last layer (except the task layers)
            in the branch model will be used as the embedding layer.
        margin: double, optional
            Specifies the margin value used by the embedding model. When it is not given, for Siamese, margin is 2.0.
            Otherwise, margin is 0.0.

        Returns
        -------
        :class:`Model`

        '''

        # check the branch type
        if not isinstance(branch, Model):
            raise DLPyError('The branch option must contain a valid model')

        # the branch must be built using functional APIs
        # only functional model has the attr output_layers
        if not hasattr(branch, 'output_layers'):
            print("NOTE: Convert the branch model into a functional model.")
            branch_tensor = branch.to_functional_model()
        else:
            branch_tensor = deepcopy(branch)

        # always reset this local tensor to 0
        branch_tensor.number_of_instances = 0

        # the branch cannot contain other task layers
        if len(branch_tensor.output_layers) != 1:
            raise DLPyError(
                'The branch model cannot contain more than one output layer')
        elif branch_tensor.output_layers[0].type == OutputLayer.type or \
                branch_tensor.output_layers[0].type == Keypoints.type:
            print("NOTE: Remove the task layers from the model.")
            branch_tensor.layers.remove(branch_tensor.output_layers[0])
            branch_tensor.output_layers[0] = branch_tensor.layers[-1]
        elif branch_tensor.output_layers[0].can_be_last_layer:
            raise DLPyError(
                'The branch model cannot contain task layer except output or keypoints layer.'
            )

        # check embedding_model_type
        if embedding_model_type.lower() not in [
                'siamese', 'triplet', 'quartet'
        ]:
            raise DLPyError('Only Siamese, Triplet, and Quartet are valid.')

        if embedding_model_type.lower() == 'siamese':
            if margin is None:
                margin = 2.0
            cls.number_of_branches = 2
        elif embedding_model_type.lower() == 'triplet':
            if margin is None:
                margin = 0.0
            cls.number_of_branches = 3
        elif embedding_model_type.lower() == 'quartet':
            if margin is None:
                margin = 0.0
            cls.number_of_branches = 4

        cls.embedding_model_type = embedding_model_type

        # build the branches
        input_layers = []
        branch_layers = []
        for i_branch in range(cls.number_of_branches):
            temp_input_layer = Input(**branch_tensor.layers[0].config,
                                     name=cls.input_layer_name_prefix +
                                     str(i_branch))
            temp_branch = branch_tensor(
                temp_input_layer)  # return a list of tensors
            if embedding_layer:
                temp_embed_layer = deepcopy(embedding_layer)
                temp_embed_layer.name = cls.embedding_layer_name_prefix + str(
                    i_branch)
                temp_branch = temp_embed_layer(temp_branch)
                # change tensor to a list
                temp_branch = [temp_branch]
            else:
                # change the last layer name to the embedding layer name
                temp_branch[
                    -1]._op.name = cls.embedding_layer_name_prefix + str(
                        i_branch)

            if i_branch == 0:
                cls.branch_input_tensor = temp_input_layer
                if len(temp_branch) == 1:
                    cls.branch_output_tensor = temp_branch[0]
                else:
                    cls.branch_output_tensor = temp_branch

            # append these layers to the current branch
            input_layers.append(temp_input_layer)
            branch_layers = branch_layers + temp_branch

        # add the embedding loss layer
        loss_layer = EmbeddingLoss(
            margin=margin, name=cls.embedding_loss_layer_name)(branch_layers)

        # create the model DAG using all the above model information
        model = EmbeddingModel(branch.conn,
                               model_table=model_table,
                               inputs=input_layers,
                               outputs=loss_layer)

        # sharing weights
        # get all layer names from one branch
        num_l = int((len(model.layers) - 1) / cls.number_of_branches)
        br1_name = [i.name for i in model.layers[:num_l - 1]]

        # build the list that contain the shared layers
        share_list = []
        n_id = 0
        n_to = n_id + cls.number_of_branches
        for l in br1_name[1:]:
            share_list.append(
                {l: [l + '_' + str(i + 1) for i in range(n_id + 1, n_to)]})

        # add embedding layers
        share_list.append({
            cls.embedding_layer_name_prefix + str(0): [
                cls.embedding_layer_name_prefix + str(i)
                for i in range(1, cls.number_of_branches)
            ]
        })

        model.share_weights(share_list)

        model.compile()

        # generate data_specs
        if embedding_model_type.lower() == 'siamese':
            cls.data_specs = [
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '0',
                         data=['_image_']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '1',
                         data=['_image_1']),
                DataSpec(type_='numnom',
                         layer=cls.embedding_loss_layer_name,
                         data=['_dissimilar_'])
            ]
        elif embedding_model_type.lower() == 'triplet':
            cls.data_specs = [
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '0',
                         data=['_image_']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '1',
                         data=['_image_1']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '2',
                         data=['_image_2'])
            ]

        elif embedding_model_type.lower() == 'quartet':
            cls.data_specs = [
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '0',
                         data=['_image_']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '1',
                         data=['_image_1']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '2',
                         data=['_image_2']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '3',
                         data=['_image_3'])
            ]

        return model
Exemplo n.º 11
0
    def deploy_embedding_model(self,
                               path,
                               output_format='astore',
                               model_type='branch'):
        """
        Deploy the deep learning model to a data file

        Parameters
        ----------
        path : string
            Specifies the location to store the model files.
            If the output_format is set to castable, then the location has to be on the server-side.
            Otherwise, the location has to be on the client-side.
        output_format : string, optional
            Specifies the format of the deployed model.
            When astore is specified, the learned embedding features will be output as well.
            Valid Values: astore, castable, or onnx
            Default: astore
        model_type : string, optional
            Specifies how to deploy the embedding model. "branch" means only one branch model is deployed to extract
            features while "full" means the full model is deployed to extract features for all branches and
            compute the distance metric values for all input data pairs.
            Valid values: branch and full
            Default: branch

        Notes
        -----
        Currently, this function supports sashdat, astore, and onnx formats.

        More information about ONNX can be found at: https://onnx.ai/

        DLPy supports ONNX version >= 1.3.0, and Opset version 8.

        For ONNX format, currently supported layers are convo, pool,
        fc, batchnorm, residual, concat, reshape, and detection.

        If dropout is specified in the model, train the model using
        inverted dropout, which can be specified in :class:`Optimizer`.
        This will ensure the results are correct when running the
        model during test phase.

        Returns
        --------
        :class:`Model` for a branch model when model_type is 'branch'

        """

        if model_type.lower() not in ['branch', 'full']:
            raise DLPyError('Only branch and full are valid.')

        if model_type.lower() == 'full':
            temp_embed_layers = []
            for i_branch in range(self.number_of_branches):
                temp_embed_layers.append(self.embedding_layer_name_prefix +
                                         str(i_branch))
            self.deploy(path=path,
                        output_format=output_format,
                        layers=temp_embed_layers)
        else:
            # create a fake task layer
            fake_output_layer = OutputLayer(n=1, name='Output1')(
                self.branch_output_tensor)
            # build the branch model from the tensor
            branch_model = Model(self.conn,
                                 inputs=self.branch_input_tensor,
                                 outputs=fake_output_layer,
                                 model_table=self.model_name + '_branch')
            branch_model.compile()

            # attach weights
            weight_tbl = WeightsTable(self.conn, self.model_weights.name,
                                      self.model_name)
            branch_model.set_weights(weight_tbl)

            # inherit the weight attr from the full model
            self.conn.retrieve('table.attribute',
                               _messagelevel='error',
                               name=self.model_weights.name,
                               task='CONVERT',
                               attrtable=self.model_weights.name + '_attr')

            self.conn.retrieve('table.attribute',
                               _messagelevel='error',
                               name=branch_model.model_weights.name,
                               task='ADD',
                               attrtable=self.model_weights.name + '_attr')

            self.conn.retrieve('table.dropTable',
                               _messagelevel='error',
                               table=self.model_weights.name + '_attr')

            # add model attrs
            data_specs = [
                DataSpec(type_='IMAGE',
                         layer=self.input_layer_name_prefix + '0',
                         data=['_image_']),
                DataSpec(type_='NUMNOM',
                         layer='Output1',
                         data=['_fake_output_'],
                         nominals=['_fake_output_'])
            ]
            create_extended_attributes(branch_model.conn,
                                       branch_model.model_name,
                                       branch_model.layers, data_specs)

            # deploy it
            temp_embed_layer = self.embedding_layer_name_prefix + '0'
            branch_model.deploy(path=path,
                                output_format=output_format,
                                layers=temp_embed_layer)

            return branch_model
Exemplo n.º 12
0
    def fit_embedding_model(self,
                            optimizer,
                            data=None,
                            path=None,
                            n_samples=512,
                            label_level=-2,
                            resize_width=None,
                            resize_height=None,
                            max_iter=1,
                            valid_table=None,
                            valid_freq=1,
                            gpu=None,
                            seed=0,
                            record_seed=0,
                            save_best_weights=False,
                            n_threads=None,
                            train_from_scratch=None):
        """
        Fitting a deep learning model for embedding learning.

        Parameters
        ----------

        optimizer : :class:`Optimizer`
            Specifies the parameters for the optimizer.
        data : class:`ImageEmbeddingTable`, optional
            This is the input data. It muse be a ImageEmbeddingTable object. Either data or path has to be specified.
        path : string, optional
            The path to the image directory on the server.
            Path may be absolute, or relative to the current caslib root.
            when path is specified, the data option will be ignored.
            A new sample of data will be randomly generated after the number of epochs defined in Optimizer.
            max_iter defines how many iterations the random sample will be generated.
        n_samples : int, optional
            Number of samples to generate.
            Default: 512
        label_level : int, optional
            Specifies which path level should be used to generate the class labels for each image.
            This class label determines whether a given image pair belongs to the same class.
            For instance, label_level = 1 means the first directory and label_level = -2 means the last directory.
            This internally use the SAS scan function
            (check https://www.sascrunch.com/scan-function.html for more details).
            Default: -2
        resize_width : int, optional
            Specifies the image width that needs be resized to. When resize_width is not given, it will be reset to
            the specified resize_height.
        resize_height : int, optional
            Specifies the image height that needs be resized to. When resize_height is not given, it will be reset to
            the specified resize_width.
        max_iter : int, optional
            Hard limit on iterations when randomly generating data.
            Default: 1
        valid_table : string or CASTable, optional
            Specifies the table with the validation data. The validation
            table must have the same columns and data types as the training table.
        valid_freq : int, optional
            Specifies the frequency for scoring the validation table.
        gpu : :class:`Gpu`, optional
            When specified, the action uses graphical processing unit hardware.
            The simplest way to use GPU processing is to specify "gpu=1".
            In this case, the default values of other GPU parameters are used.
            Setting gpu=1 enables all available GPU devices for use. Setting
            gpu=0 disables GPU processing.
        seed : double, optional
            specifies the random number seed for the random number generator
            in SGD. The default value, 0, and negative values indicate to use
            random number streams based on the computer clock. Specify a value
            that is greater than 0 for a reproducible random number sequence.
        record_seed : double, optional
            specifies the random number seed for the random record selection
            within a worker. The default value 0 disables random record selection.
            Records are read as they are laid out in memory.
            Negative values indicate to use random number streams based on the
            computer clock.
        save_best_weights : bool, optional
            When set to True, it keeps the weights that provide the smallest
            loss error.
        n_threads : int, optional
            Specifies the number of threads to use. If nothing is set then
            all of the cores available in the machine(s) will be used.
        train_from_scratch : bool, optional
            When set to True, it ignores the existing weights and trains the model from the scratch.

        Returns
        --------
        :class:`CASResults` or a list of `CASResults` when the path option is specified

        """

        # check options
        if data is None and path is None:
            raise DLPyError(
                'Either the data option or path must be specified to generate the input data'
            )

        if data is not None and path is not None:
            print(
                'Note: the data option will be ignored and the path option will be used to generate the input '
                'data')

        # check the data type
        if path is None:
            if not isinstance(data, ImageEmbeddingTable):
                raise DLPyError(
                    'The data option must contain a valid embedding table')
            if data.embedding_model_type.lower() != self.embedding_model_type:
                raise DLPyError(
                    'The data option must contain a valid embedding table for '
                    + self.embedding_model_type)

        # use the data option to train a model
        if path is None:
            res = self.fit(data,
                           inputs=None,
                           target=None,
                           data_specs=self.data_specs,
                           optimizer=optimizer,
                           valid_table=valid_table,
                           valid_freq=valid_freq,
                           gpu=gpu,
                           seed=seed,
                           record_seed=record_seed,
                           force_equal_padding=True,
                           save_best_weights=save_best_weights,
                           n_threads=n_threads,
                           target_order=None,
                           train_from_scratch=train_from_scratch)
        else:  # use the path option to generate the input data
            import time
            res = []
            time_start = time.time()
            for data_iter in range(0, max_iter):
                # generate a new data table
                time_0 = time.time()
                data = ImageEmbeddingTable.load_files(
                    self.conn,
                    path=path,
                    n_samples=n_samples,
                    label_level=label_level,
                    embedding_model_type=self.embedding_model_type,
                    resize_width=resize_width,
                    resize_height=resize_height)
                print(
                    'Note: data generation took {} (s) at iteration {}'.format(
                        time.time() - time_0, data_iter))

                # train the model using this data
                if data_iter == 0:
                    train_from_scratch_real = train_from_scratch
                else:
                    train_from_scratch_real = False
                res_t = self.fit(data,
                                 inputs=None,
                                 target=None,
                                 data_specs=self.data_specs,
                                 optimizer=optimizer,
                                 valid_table=valid_table,
                                 valid_freq=valid_freq,
                                 gpu=gpu,
                                 seed=seed,
                                 record_seed=record_seed,
                                 force_equal_padding=True,
                                 save_best_weights=save_best_weights,
                                 n_threads=n_threads,
                                 target_order=None,
                                 train_from_scratch=train_from_scratch_real)
                res.append(res_t)
                # drop this data
                data.droptable()
            print('Note: Training with data generation took {} (s)'.format(
                time.time() - time_start))
        return res
Exemplo n.º 13
0
def YoloV2(conn,
           anchors,
           model_table='YoloV2',
           n_channels=3,
           width=416,
           height=416,
           scale=1.0 / 255,
           random_mutation=None,
           act='leaky',
           act_detection='AUTO',
           softmax_for_class_prob=True,
           coord_type='YOLO',
           max_label_per_image=30,
           max_boxes=30,
           n_classes=20,
           predictions_per_grid=5,
           do_sqrt=True,
           grid_number=13,
           coord_scale=None,
           object_scale=None,
           prediction_not_a_object_scale=None,
           class_scale=None,
           detection_threshold=None,
           iou_threshold=None,
           random_boxes=False,
           match_anchor_size=None,
           num_to_force_coord=None,
           random_flip=None,
           random_crop=None):
    '''
    Generates a deep learning model with the Yolov2 architecture.

    Parameters
    ----------
    conn : CAS
        Specifies the connection of the CAS connection.
    anchors : list
        Specifies the anchor box values.
    model_table : string, optional
        Specifies the name of CAS table to store the model.
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 416
    height : int, optional
        Specifies the height of the input layer.
        Default: 416
    scale : double, optional
        Specifies a scaling factor to be applied to each pixel intensity values.
        Default: 1.0 / 255
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the input layer.
        Valid Values: 'none', 'random'
    act : string, optional
        Specifies the activation function for the batch normalization layers.
        Default: 'leaky'
    act_detection : string, optional
        Specifies the activation function for the detection layer.
        Valid Values: AUTO, IDENTITY, LOGISTIC, SIGMOID, TANH, RECTIFIER, RELU, SOFPLUS, ELU, LEAKY, FCMP
        Default: AUTO
    softmax_for_class_prob : bool, optional
        Specifies whether to perform Softmax on class probability per
        predicted object.
        Default: True
    coord_type : string, optional
        Specifies the format of how to represent bounding boxes. For example,
        a bounding box can be represented with the x and y locations of the
        top-left point as well as width and height of the rectangle.
        This format is the 'rect' format. We also support coco and yolo formats.
        Valid Values: 'rect', 'yolo', 'coco'
        Default: 'yolo'
    max_label_per_image : int, optional
        Specifies the maximum number of labels per image in the training.
        Default: 30
    max_boxes : int, optional
        Specifies the maximum number of overall predictions allowed in the
        detection layer.
        Default: 30
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 20
    predictions_per_grid : int, optional
        Specifies the amount of predictions will be done per grid.
        Default: 5
    do_sqrt : bool, optional
        Specifies whether to apply the SQRT function to width and height of
        the object for the cost function.
        Default: True
    grid_number : int, optional
        Specifies the amount of cells to be analyzed for an image. For example,
        if the value is 5, then the image will be divided into a 5 x 5 grid.
        Default: 13
    coord_scale : float, optional
        Specifies the weight for the cost function in the detection layer,
        when objects exist in the grid.
    object_scale : float, optional
        Specifies the weight for object detected for the cost function in
        the detection layer.
    prediction_not_a_object_scale : float, optional
        Specifies the weight for the cost function in the detection layer,
        when objects do not exist in the grid.
    class_scale : float, optional
        Specifies the weight for the class of object detected for the cost
        function in the detection layer.
    detection_threshold : float, optional
        Specifies the threshold for object detection.
    iou_threshold : float, optional
        Specifies the IOU Threshold of maximum suppression in object detection.
    random_boxes : bool, optional
        Randomizing boxes when loading the bounding box information.
        Default: False
    match_anchor_size : bool, optional
        Whether to force the predicted box match the anchor boxes in sizes for all predictions
    num_to_force_coord : int, optional
        The number of leading chunk of images in training when the algorithm forces predicted objects
        in each grid to be equal to the anchor box sizes, and located at the grid center
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'

    Returns
    -------
    :class:`Sequential`

    References
    ----------
    https://arxiv.org/pdf/1612.08242.pdf

    '''

    if len(anchors) != 2 * predictions_per_grid:
        raise DLPyError(
            'The size of the anchor list in the detection layer for YOLOv2 should be equal to '
            'twice the number of predictions_per_grid.')

    model = Sequential(conn=conn, model_table=model_table)

    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)

    if input_parameters['width'] != input_parameters['height']:
        print(
            not_supported_feature('Non-square yolo model training',
                                  'height=width'))
        input_parameters['height'] = input_parameters['width']

    model.add(InputLayer(**input_parameters))

    # conv1 224 416
    model.add(Conv2d(32, width=3, act='identity', include_bias=False,
                     stride=1))
    model.add(BN(act=act))
    model.add(Pooling(width=2, height=2, stride=2, pool='max'))
    # conv2 112 208
    model.add(Conv2d(64, width=3, act='identity', include_bias=False,
                     stride=1))
    model.add(BN(act=act))
    model.add(Pooling(width=2, height=2, stride=2, pool='max'))
    # conv3 56 104
    model.add(
        Conv2d(128, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv4 56 104
    model.add(Conv2d(64, width=1, act='identity', include_bias=False,
                     stride=1))
    model.add(BN(act=act))
    # conv5 56 104
    model.add(
        Conv2d(128, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    model.add(Pooling(width=2, height=2, stride=2, pool='max'))
    # conv6 28 52
    model.add(
        Conv2d(256, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv7 28 52
    model.add(
        Conv2d(128, width=1, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv8 28 52
    model.add(
        Conv2d(256, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    model.add(Pooling(width=2, height=2, stride=2, pool='max'))
    # conv9 14 26
    model.add(
        Conv2d(512, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv10 14 26
    model.add(
        Conv2d(256, width=1, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv11 14 26
    model.add(
        Conv2d(512, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv12 14 26
    model.add(
        Conv2d(256, width=1, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv13 14 26
    model.add(
        Conv2d(512, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    model.add(Pooling(width=2, height=2, stride=2, pool='max'))
    # conv14 7 13
    model.add(
        Conv2d(1024, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv15 7 13
    model.add(
        Conv2d(512, width=1, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv16 7 13
    model.add(
        Conv2d(1024, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv17 7 13
    model.add(
        Conv2d(512, width=1, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))
    # conv18 7 13
    model.add(
        Conv2d(1024, width=3, act='identity', include_bias=False, stride=1))
    model.add(BN(act=act))

    model.add(
        Conv2d((n_classes + 5) * predictions_per_grid,
               width=1,
               act='identity',
               include_bias=False,
               stride=1))

    model.add(
        Detection(act=act_detection,
                  detection_model_type='yolov2',
                  anchors=anchors,
                  softmax_for_class_prob=softmax_for_class_prob,
                  coord_type=coord_type,
                  class_number=n_classes,
                  grid_number=grid_number,
                  predictions_per_grid=predictions_per_grid,
                  do_sqrt=do_sqrt,
                  coord_scale=coord_scale,
                  object_scale=object_scale,
                  prediction_not_a_object_scale=prediction_not_a_object_scale,
                  class_scale=class_scale,
                  detection_threshold=detection_threshold,
                  iou_threshold=iou_threshold,
                  random_boxes=random_boxes,
                  max_label_per_image=max_label_per_image,
                  max_boxes=max_boxes,
                  match_anchor_size=match_anchor_size,
                  num_to_force_coord=num_to_force_coord))

    return model
Exemplo n.º 14
0
def remove_layer_wrapper(layer):
    '''
    Determines underlying layer type for wrapped layers

    Parameters
    ----------
    layer : Layer object
       Current layer object

    Returns
    -------
    string
        class name of wrapped layer
    list of layer objects
        unwrapped layer object(s)

    '''
    class_name = layer.__class__.__name__.lower()
    # check for layer wrappers
    sublayers = []
    if class_name == 'timedistributed':
        layer_info = layer.get_config()['layer']
        layer_info['config']['name'] = layer.name
        class_name = layer_info['class_name'].lower()
        if class_name == 'dense':
            sublayers.append(keras.layers.Dense(**layer_info['config']))
        else:
            raise DLPyError(class_name + ' is an unsupported time distributed '
                            'layer type - model conversion failed')
    elif class_name == 'bidirectional':
        layer_info = layer.get_config()['layer']
        class_name = layer_info['class_name'].lower()
        # forward direction
        layer_info['config']['name'] = layer.forward_layer.name
        layer_info['config']['go_backwards'] = False
        if class_name == 'lstm':
            sublayers.append(keras.layers.LSTM(**layer_info['config']))
        elif class_name == 'gru':
            sublayers.append(keras.layers.GRU(**layer_info['config']))
        elif class_name == 'simplernn':
            sublayers.append(keras.layers.SimpleRNN(**layer_info['config']))
        elif class_name == 'cudnnlstm':
            sublayers.append(keras.layers.CuDNNLSTM(**layer_info['config']))
        elif class_name == 'cudnngru':
            sublayers.append(keras.layers.CuDNNGRU(**layer_info['config']))
        else:
            raise DLPyError(class_name + ' is an unsupported time distributed '
                            'layer type - model conversion failed')
        # backward direction
        layer_info['config']['name'] = layer.backward_layer.name
        layer_info['config']['go_backwards'] = True
        if class_name == 'lstm':
            sublayers.append(keras.layers.LSTM(**layer_info['config']))
        elif class_name == 'gru':
            sublayers.append(keras.layers.GRU(**layer_info['config']))
        elif class_name == 'simplernn':
            sublayers.append(keras.layers.SimpleRNN(**layer_info['config']))
        elif class_name == 'cudnnlstm':
            sublayers.append(keras.layers.CuDNNLSTM(**layer_info['config']))
        elif class_name == 'cudnngru':
            sublayers.append(keras.layers.CuDNNGRU(**layer_info['config']))
        else:
            raise DLPyError(class_name + ' is an unsupported time distributed '
                            'layer type - model conversion failed')
    else:
        sublayers.append(layer)

    # Must return sublayers in reverse order if CUDNN is used.
    # This aligns the Viya layer mapping with the CUDNN layer
    # mapping.
    if layer.__class__.__name__.lower() == 'bidirectional':
        sublayer_info = layer.get_config()['layer']
        if sublayer_info['class_name'].lower() in ['cudnnlstm', 'cudnngru']:
            sublayers.reverse()
            #sublayers = [sublayers[1], sublayers[0]]

    return class_name, sublayers
Exemplo n.º 15
0
def display_predicted_image_captions(conn,
                                     result_tbl,
                                     npreds=2,
                                     ncol=2,
                                     img_path=None,
                                     figsize=None):
    '''
    Shows caption prediction for random images

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    result_tbl : CASResults object
        Table containing results from scoring the test data
    npreds : int, optional
       Specifies number of caption predictions to show
        Default : 2
    ncol : int, optional
        Specifies number of columns to display images in
        Default : 2
    img_path : string, optional
        If used, specifies path to wanted_file to show images along with captions and objects.
        If None, only shows captions and objects
        Default : None
    figsize : tuple of ints, optional
        Specifies size of images to be displayed
        Default : (16,(16 / ncol*nrow))

    '''
    results = scored_results_to_dict(result_tbl)

    nimages = min(npreds, len(results))

    if img_path is None:
        for i in range(nimages):
            r = random.randint(0, len(results) - 1)

            f_name = list(results.keys())[r]
            actual_caps = (conn.CASTable(
                result_tbl.name,
                where='''_filename_0="{}"'''.format(f_name)).iloc[:, 'caption']
                           ).values
            truth = "\n\t".join(actual_caps)
            objects = (conn.CASTable(result_tbl.name,
                                     where='''_filename_0="{}"'''.format(
                                         f_name)).iloc[:,
                                                       'first_objects']).values
            objects = "\n\t".join(objects[0].split(','))
            rand_row = results[f_name]
            prediction = rand_row[1]
            print(
                "Filename: {}\nObjects: {}\nGround Truth: {}\nPredicted: {}\n".
                format(f_name, objects, truth, prediction))
    else:
        if nimages > ncol:
            nrow = nimages // ncol + 1
        else:
            nrow = 1
            ncol = nimages
        if figsize is None:
            figsize = (16, 16 // ncol * nrow)
        fig = plt.figure(figsize=figsize)

        for i in range(nimages):
            r = random.randint(0, len(results) - 1)
            f_name = list(results.keys())[r]
            rand_row = results[f_name]
            actual_caps = (conn.CASTable(
                result_tbl.name,
                where='''_filename_0="{}"'''.format(f_name)).iloc[:, 'caption']
                           ).values
            truth = "\n".join(actual_caps)
            objects = (conn.CASTable(result_tbl.name,
                                     where='''_filename_0="{}"'''.format(
                                         f_name)).iloc[:,
                                                       'first_objects']).values
            objects = objects[0]
            caption = rand_row[1]
            if '/' in img_path:
                image = '{}/{}'.format(img_path, f_name)
            elif '\\' in img_path:
                image = '{}\{}'.format(img_path, f_name)
            else:
                raise DLPyError('img_path given is not a valid path')
            image = mpimg.imread(image)
            ax = fig.add_subplot(nrow, ncol, i + 1)
            ax.set_title('Objects: {}\nGround Truth: {}\nPredicted: {}'.format(
                objects, truth, caption))
            plt.imshow(image)
            plt.xticks([]), plt.yticks([])
        plt.show()
Exemplo n.º 16
0
def DenseNet121_ONNX(conn,
                     model_file,
                     n_classes=1000,
                     width=224,
                     height=224,
                     offsets=(255 * 0.406, 255 * 0.456, 255 * 0.485),
                     norm_stds=(255 * 0.225, 255 * 0.224, 255 * 0.229),
                     random_flip=None,
                     random_crop=None,
                     random_mutation=None,
                     include_top=False):
    """
    Generates a deep learning model with the DenseNet121_ONNX architecture.
    The model architecture and pre-trained weights is generated from DenseNet121 ONNX trained on ImageNet dataset.
    The model file and the weights file can be downloaded from https://support.sas.com/documentation/prod-p/vdmml/zip/.
    To learn more information about the model and pre-processing.
    Please go to the websites: https://github.com/onnx/models/tree/master/vision/classification/densenet-121.

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    model_file : string
        Specifies the absolute server-side path of the model table file.
        The model table file can be downloaded from https://support.sas.com/documentation/prod-p/vdmml/zip/.
    n_classes : int, optional
        Specifies the number of classes.
        Default: 1000
    width : int, optional
        Specifies the width of the input layer.
        Default: 224
    height : int, optional
        Specifies the height of the input layer.
        Default: 224
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
        The channel order is BGR.
        Default: (255*0.406, 255*0.456, 255*0.485)
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
        The channel order is BGR.
        Default: (255*0.225, 255*0.224, 255*0.229)
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the input layer.
        Valid Values: 'none', 'random'
    include_top : bool, optional
        Specifies whether to include pre-trained weights of the top layers (i.e., the FC layers)
        Default: False

    """
    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)

    # load model and model weights
    model = Model.from_sashdat(conn, path=model_file)
    # check if a user points to a correct model.
    if model.summary.shape[0] != 307:
        raise DLPyError(
            "The model file doesn't point to a valid DenseNet121_ONNX model. "
            "Please check the SASHDAT file.")
    # extract input layer config
    model_table_df = conn.CASTable(**model.model_table).to_frame()
    input_layer_df = model_table_df[model_table_df['_DLLayerID_'] == 0]
    input_layer = extract_input_layer(input_layer_df)
    input_layer_config = input_layer.config
    # update input layer config
    input_layer_config.update(input_parameters)
    # update the layer list
    model.layers[0] = InputLayer(**input_layer_config,
                                 name=model.layers[0].name)

    # warning if model weights doesn't exist
    if not conn.tableexists(model.model_weights.name).exists:
        weights_file_path = os.path.join(os.path.dirname(model_file),
                                         model.model_name + '_weights.sashdat')
        print('WARNING: Model weights is not attached '
              'since system cannot find a weights file located at {}'.format(
                  weights_file_path))

    if include_top:
        if n_classes != 1000:
            raise DLPyError(
                "If include_top is enabled, n_classes has to be 1000.")
    else:
        # since the output layer is non fully connected layer,
        # we need to modify the convolution right before the output. The number of filter is set to n_classes.
        conv_layer_df = model_table_df[model_table_df['_DLLayerID_'] == 305]
        conv_layer = extract_conv_layer(conv_layer_df)
        conv_layer_config = conv_layer.config
        # update input layer config
        conv_layer_config.update({'n_filters': n_classes})
        # update the layer list
        model.layers[-2] = Conv2d(**conv_layer_config,
                                  name=model.layers[-2].name,
                                  src_layers=model.layers[-3])

        # overwrite n_classes in output layer
        out_layer_df = model_table_df[model_table_df['_DLLayerID_'] == 306]
        out_layer = extract_output_layer(out_layer_df)
        out_layer_config = out_layer.config
        # update input layer config
        out_layer_config.update({'n': n_classes})
        # update the layer list
        model.layers[-1] = OutputLayer(**out_layer_config,
                                       name=model.layers[-1].name,
                                       src_layers=model.layers[-2])

        # remove top weights
        model.model_weights.append_where('_LayerID_<305')
        model._retrieve_('table.partition',
                         table=model.model_weights,
                         casout=dict(replace=True,
                                     name=model.model_weights.name))
        model.set_weights(model.model_weights.name)
    # recompile the whole network according to the new layer list
    model.compile()
    return model
Exemplo n.º 17
0
def bert_summary(conn, table_name, full_table=True, subset_fraction=0.1):
    '''
    Display summary statistics for tokenized data from a given CAS table

    Parameters
    ----------
    conn : CAS Connection
        Specifies the CAS connection.
    table_name : string
        Specifies the name of the CAS table.
    full_table: boolean, optional
        Specifies whether statistics are calculated over full table or subset.
        Default: True
    subset_fraction : float, optional
        Specifies the fraction of the table to use to calculate summary
        statistics.  May be necessary for large tables.
        Default: 0.1
        
    '''

    r = conn.retrieve('table.recordcount',
                      _messagelevel='error',
                      table=table_name)
    num_obs = r['RecordCount']['N'].values[0]
    print("NOTE: there are " + str(num_obs) +
          " observations in the Viya table.")

    if not full_table:
        num_obs_calc = int(round(num_obs * subset_fraction))
        print("NOTE: calculating summary statistics based on the first " +
              str(round(subset_fraction * 100.0)) + "% of the table.\n")
    else:
        num_obs_calc = num_obs

    chunk_size = min([num_obs_calc, 10000])
    min_tokens = sys.maxsize
    max_tokens = 0
    sum_tokens = 0
    sum_sq_tokens = 0
    token_var = BertCommon['variable_names']['token_var']
    for ii in range(0, num_obs_calc, chunk_size):
        num_rows = min([chunk_size, num_obs_calc - ii])
        tmp = conn.retrieve('table.fetch',
                            _messagelevel='error',
                            table=table_name,
                            maxrows=num_rows,
                            from_=ii,
                            to=ii + num_rows)

        col_names = list(tmp['Fetch'])
        if token_var not in list(tmp['Fetch']):
            raise DLPyError("Missing variable " + token_var + " in table " +
                            table_name + ".")

        tmp_list = tmp['Fetch'][token_var].to_list()
        obs_num_tokens = [
            len(tmp_list[jj].split(' ')) for jj in range(num_rows)
        ]
        min_tokens = min([min_tokens, min(obs_num_tokens)])
        max_tokens = max([max_tokens, max(obs_num_tokens)])
        sum_tokens += sum(obs_num_tokens)
        sum_sq_tokens += sum(
            [x1 * x2 for x1, x2 in zip(obs_num_tokens, obs_num_tokens)])

    mean_num_tokens = sum_tokens / num_obs_calc
    std_num_tokens = np.sqrt(sum_sq_tokens / num_obs_calc -
                             mean_num_tokens * mean_num_tokens)

    print("NOTE: minimum number of tokens in an observation = " +
          str(min_tokens))
    print("NOTE: maximum number of tokens in an observation = " +
          str(max_tokens))
    print("NOTE: average number of tokens in an observation = " +
          str(mean_num_tokens))
    print(
        "NOTE: standard deviation of the number of tokens in an observation = "
        + str(std_num_tokens) + '\n')
Exemplo n.º 18
0
    def load_audio_files(cls, conn, path, casout=None, caslib=None):
        '''
        Load audio files from path

        Parameters
        ----------
        conn : CAS
            CAS connection object
        path : string
            Path to audio files
        casout : dict or string or CASTable, optional
            The output CAS table specification
        caslib : string, optional
            The caslib to load audio files from

        Returns
        -------
        :class:`AudioTable`
            If audio files are found
        None
            If no audio files are found

        '''
        conn.loadactionset('audio', _messagelevel='error')

        if casout is None:
            casout = dict(name=random_name('AudioTable', 6))
        elif isinstance(casout, CASTable):
            casout = casout.to_outtable_params()

        if caslib is None:
            # get the os of the server
            server_type = get_cas_host_type(conn).lower()
            if server_type.startswith("lin") or server_type.startswith("osx"):
                path_split = path.rsplit("/", 1)
            else:
                path_split = path.rsplit("\\", 1)

            # try accessing the file
            if len(path_split) == 2:
                caslib = find_caslib(conn, path_split[0])
                if caslib is not None:
                    rt2 = conn.retrieve('audio.loadaudio',
                                        _messagelevel='error',
                                        casout=casout,
                                        caslib=caslib,
                                        path=path_split[1])
                    if rt2.severity > 1:
                        for msg in rt2.messages:
                            print(msg)
                        raise DLPyError(
                            'cannot load audio files, something is wrong!')
                    cls.running_caslib = path_split[0]
                    return AudioTable(casout['name'])
                else:
                    caslib = random_name('Caslib', 6)
                    rt2 = conn.retrieve('addcaslib',
                                        _messagelevel='error',
                                        name=caslib,
                                        path=path_split[0],
                                        activeonadd=False,
                                        subdirectories=True,
                                        datasource={'srctype': 'path'})
                    if rt2.severity < 2:
                        rt3 = conn.retrieve('audio.loadaudio',
                                            _messagelevel='error',
                                            casout=casout,
                                            caslib=caslib,
                                            path=path_split[1])
                        if rt3.severity > 1:
                            for msg in rt3.messages:
                                print(msg)
                            raise DLPyError(
                                'cannot load audio files, something is wrong!')
                        else:
                            cls.running_caslib = path_split[0]
                            return AudioTable(casout['name'])
            return None
        else:
            rt4 = conn.retrieve('audio.loadaudio',
                                _messagelevel='error',
                                casout=casout,
                                caslib=caslib,
                                path=path)
            if rt4.severity > 1:
                for msg in rt4.messages:
                    print(msg)
                raise DLPyError('cannot load audio files, something is wrong!')
            cls.running_caslib = find_path_of_caslib(conn, caslib)
            return AudioTable(casout['name'])
Exemplo n.º 19
0
def write_block_information(pymodel, layer_info, keywords, f_out):
    '''
    Write information for a block of layers to an HDF5 file

    Parameters
    ----------
    pymodel : PyTorch model
        Specifies the Pytorch model object.
    layer_info : list of dictionaries
        Specifies a list of dictionaries - each dictionary entry defines a layer in the model.
    keywords : list of strings
        Specifies the keywords to search for in the PyTorch model dictionary.
    f_out: file handle
        Specifies the HDF5 file handle.

    Returns
    -------
    PyTorch parameter name, PyTorch tensor

    '''

    if isinstance(keywords, list):
        key_list = keywords
    else:
        key_list = [keywords]

    for ii in range(len(layer_info)):
        for key in layer_info[ii].keys():
            lname = layer_info[ii][key]['name']
            ltype = layer_info[ii][key]['type']
            ldim = layer_info[ii][key]['dim']

            if ltype != BertCommon['layer_types']['noparms']:
                matval, vecval = extract_pytorch_parms(pymodel, lname, ltype,
                                                       ldim, key_list)

                # there should be only one match for a given layer
                if (len(matval) > 1) or ((vecval != None) and
                                         (len(vecval) > 1)):
                    raise DLPyError(
                        'There were multiple Pytorch layers that matched layer '
                        + lname)

                g_out = f_out.create_group(lname)
                new_weight_names = []

                # save weights in format amenable to SAS
                dset_name = generate_hdf5_dataset_name(
                    lname, BertCommon['weight_index'])
                new_weight_names.append(dset_name)
                g_out.create_dataset(dset_name, data=matval[0])

                # save bias in format amenable to SAS
                if vecval is not None:
                    dset_name = generate_hdf5_dataset_name(
                        lname, BertCommon['bias_index'])
                    new_weight_names.append(dset_name)
                    g_out.create_dataset(dset_name, data=vecval[0])

                # update weight names
                g_out.attrs['weight_names'] = new_weight_names
Exemplo n.º 20
0
    def create_audio_table_speechrecognition(
            cls,
            conn,
            data_path,
            metadata_path,
            features_parameters=dict(frame_shift=10,
                                     frame_length=25,
                                     n_bins=40,
                                     n_ceps=40,
                                     feature_scaling_method='STANDARDIZATION',
                                     n_output_frames=500),
            casout=None):
        '''
        Creates an Audio table and takes care of all the necessary steps

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        data_path : string
            Path to the file that contains the list of audio files (this is
            expected to be on the server side).
        metadata_path : string
            Location to the metadata file (this is expected to be on the client side).
        features_parameters : dict, optional
            Parameters to be used while extracting audio features
        casout : string, optional
            Resulting output CAS table

        Returns
        -------
        :class:`AudioTable`
            A table containing audio features of audio files as well as their labels. The resulting table can be
            directly used in the deep learning models.

        Raises
        ------
        DLPyError
            If anything goes wrong at any in the process of creating this AudioTable, it complains and
            prints the appropriate message.

        '''
        au = cls.load_audio_files(conn, data_path)
        if au is None:
            raise DLPyError('cannot load audio files')

        fp = features_parameters

        features = cls.extract_audio_features(
            conn,
            au,
            frame_shift=fp['frame_shift'],
            frame_length=fp['frame_length'],
            n_bins=fp['n_bins'],
            n_ceps=fp['n_ceps'],
            feature_scaling_method=fp['feature_scaling_method'],
            n_output_frames=fp['n_output_frames'],
            copyvars=['_path_'])

        if features is None:
            raise DLPyError('cannot extract audio features')

        if cls.running_caslib is None:
            raise DLPyError(
                'there is something wrong, cannot identify the current caslib')

        me = cls.load_audio_metadata(conn,
                                     metadata_path,
                                     audio_path=cls.running_caslib)
        if me is None:
            raise DLPyError('cannot load the audio metadata')

        conn.loadactionset('deeplearn', _messagelevel='error')

        if casout is None:
            casout = dict(name=random_name('AudioTable', 6))
        elif isinstance(casout, CASTable):
            casout = casout.to_outtable_params()

        if 'name' not in casout:
            casout['name'] = random_name('AudioTable', 6)

        rt = conn.retrieve('dlJoin',
                           _messagelevel='error',
                           casout=casout,
                           annotation=me,
                           table=features,
                           id='_path_')

        if rt.severity > 1:
            for msg in rt.messages:
                print(msg)
            raise DLPyError('cannot create the final audio table!')

        return AudioTable(casout['name'])
Exemplo n.º 21
0
def keras_to_sas(model,
                 rnn_support,
                 model_name=None,
                 offsets=None,
                 std=None,
                 scale=1.0,
                 max_num_frames=-1,
                 verbose=False):
    output_code = ''
    layer_activation = {}
    src_layer = {}
    layer_dropout = {}
    if model_name is None:
        model_name = model.name
    model_type = 'CNN'
    n_lambda_layer = 0
    for layer in model.layers:
        class_name, sublayers = remove_layer_wrapper(layer)
        for tlayer in sublayers:
            if (class_name
                    in computation_layer_classes) or (class_name
                                                      == 'zeropadding2d'):
                comp_layer_name = find_previous_computation_layer(
                    model, layer.name, computation_layer_classes)
                source_str = make_source_str(comp_layer_name)
                src_layer.update({tlayer.name: source_str})
            elif class_name == 'activation':
                tmp_name = find_previous_computation_layer(
                    model, layer.name, computation_layer_classes)
                tmp_act = extract_activation(layer)
                layer_activation.update({tmp_name[0]: tmp_act})
            elif class_name == 'dropout':
                tmp = find_next_computation_layer(model, layer,
                                                  dropout_layer_classes)
                dconfig = layer.get_config()
                layer_dropout.update({tmp: dconfig['rate']})
            # check for RNN model
            if class_name in [
                    'simplernn', 'lstm', 'gru', 'cudnnlstm', 'cudnngru'
            ]:
                if rnn_support:
                    model_type = 'RNN'
                else:
                    raise DLPyError(
                        'RNN model detected: your Viya deployment does not support '
                        'importing an RNN model.')
            # check for Lambda layers
            if layer.__class__.__name__.lower() == 'lambda':
                n_lambda_layer = n_lambda_layer + 1

    # if first layer is not an input layer, generate the correct
    # input layer code for a SAS deep learning model
    layer = model.layers[0]
    if layer.__class__.__name__.lower() != 'inputlayer':
        sas_code = keras_input_layer(layer, model_name, False, offsets, std,
                                     scale, model_type, max_num_frames)
        # write SAS code for input layer
        if sas_code:
            output_code = output_code + sas_code + '\n\n'
        else:
            raise KerasParseError('Unable to generate an input layer')

    # only one Lambda layer supported, and it must be the last model layer
    # assumption: CTC loss must be specified for an RNN model using a
    #             Lambda layer
    ctc_loss = False
    if n_lambda_layer > 0:
        layer = model.layers[-1]
        if (n_lambda_layer == 1) and (layer.__class__.__name__.lower()
                                      == 'lambda') and (model_type == 'RNN'):
            ctc_loss = True
            if verbose:
                print(
                    'WARNING - detected a Lambda layer terminating the Keras model.  This is assumed to be '
                    'the CTC loss function definition.  If that is incorrect, please revise your Keras model.'
                )
        else:
            raise KerasParseError(
                'Detected one or more Lambda layers. Only 1 Lambda '
                'layer is supported for RNN models, and it must be '
                'the last layer.')

    # extract layers and apply activation functions as needed
    zero_pad = None
    for layer in model.layers:
        class_name, sublayers = remove_layer_wrapper(layer)
        for tlayer in sublayers:
            sas_code = None

            # determine activation function
            if class_name in ['conv2d', 'batchnormalization', 'add', 'dense']:
                if layer.name in layer_activation.keys():
                    act_func = layer_activation[layer.name]
                else:
                    act_func = None
            else:
                act_func = None

            # average/max pooling/globalaveragepooling
            if class_name in [
                    'averagepooling2d', 'maxpooling2d',
                    'globalaveragepooling2d'
            ]:
                sas_code = keras_pooling_layer(tlayer, model_name, class_name,
                                               src_layer, layer_dropout,
                                               zero_pad)
                zero_pad = None
            # 2D convolution
            elif class_name == 'conv2d':
                sas_code = keras_convolution_layer(tlayer, model_name,
                                                   act_func, src_layer,
                                                   layer_dropout, zero_pad)
                zero_pad = None
            # batch normalization
            elif class_name == 'batchnormalization':
                sas_code = keras_batchnormalization_layer(
                    tlayer, model_name, act_func, src_layer)
            # input layer
            elif class_name == 'inputlayer':
                sas_code = keras_input_layer(tlayer, model_name, True, offsets,
                                             std, scale, model_type,
                                             max_num_frames)
            # add
            elif class_name == 'add':
                sas_code = keras_residual_layer(tlayer, model_name, act_func,
                                                src_layer)
            elif class_name in [
                    'activation', 'flatten', 'dropout', 'zeropadding2d',
                    'lambda'
            ]:
                pass
            # fully connected
            elif class_name == 'dense':
                sas_code = keras_full_connect_layer(tlayer, model_name,
                                                    act_func, src_layer,
                                                    layer_dropout, ctc_loss)
            # concatenate
            elif class_name == 'concatenate':
                sas_code = keras_concatenate_layer(tlayer, model_name,
                                                   act_func, src_layer)
            # recurrent
            elif class_name in [
                    'simplernn', 'lstm', 'gru', 'cudnnlstm', 'cudnngru'
            ]:
                sas_code = keras_recurrent_layer(tlayer, model_name, act_func,
                                                 src_layer)
            else:
                raise KerasParseError(class_name + ' is an unsupported layer '
                                      'type - model conversion failed')

            # write SAS code associated with Keras layer
            if sas_code:
                output_code = output_code + sas_code + '\n\n'
            # zero-padding
            elif (class_name == 'zeropadding2d'):
                zero_pad = keras_zeropad2d_layer(tlayer, src_layer)
            elif (class_name
                  not in ['activation', 'flatten', 'dropout', 'lambda']):
                if verbose:
                    print('WARNING: unable to generate SAS definition '
                          'for layer ' + tlayer.name)
    return output_code
Exemplo n.º 22
0
def create_embeddings_from_object_detection(conn,
                                            image_table,
                                            detection_model,
                                            word_embeddings_file,
                                            n_threads=None,
                                            gpu=None,
                                            max_objects=5,
                                            word_delimiter='\t'):
    '''
    Builds CASTable with objects detected in images as numeric data

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    image_table: imageTable
        Specifies name of CASTable that contains images to be used for training
    detection_model : CASTable or string
        Specifies CASTable containing model parameters for the object detection model
    word_embeddings_file : string
        Specifies full path to file containing pre-trained word vectors to be used for text generation
        This file should be accessible from the client.
    n_threads : int, optional
        Specifies the number of threads to use when scoring the table. All cores available used when
        nothing is set.
        Default : None
    gpu : Gpu, optional
        When specified, specifies which gpu to use when scoring the table. GPU=1 uses all available
        GPU devices and default parameters.
        Default : None
    max_objects : int, optional
        Specifies max number of objects detected if less than five
        Default : 5
    word_delimiter : string, optional
        Specifies delimiter used in word_embeddings file
        Default : '\t'
    Returns
    -------
    :class:`CASTable`

    '''
    if not os.path.exists(word_embeddings_file):
        raise DLPyError('word_embeddings_file does not exist')

    if not isinstance(image_table, ImageTable):
        raise DLPyError('image_table must be an ImageTable object')

    conn.loadactionset('deepLearn')
    conn.loadactionset('textparse')

    width = detection_model.summary['Output Size'][0][1]
    height = detection_model.summary['Output Size'][0][0]
    image_table.resize(width=width, height=height)
    scoring_error = False
    try:
        scored = detection_model.predict(data=image_table,
                                         n_threads=n_threads,
                                         gpu=gpu)
    except:
        scoring_error = True
    if scoring_error or scored is None:
        raise DLPyError('Something went wrong while scoring the data.')

    object_table = detection_model.valid_res_tbl
    # combine first n objects into single column
    first_objects = object_table.copy()

    first_objects['first_objects'] = first_objects['_Object0_'] + ","
    if max_objects > 5:
        max_objects = 5
    for i in range(1, max_objects):
        objects = first_objects['_Object{}_'.format(i)] + ","
        first_objects['first_objects'] = first_objects['first_objects'].add(
            objects)

    objects_numeric = numeric_parse_text(conn,
                                         first_objects,
                                         word_embeddings_file,
                                         word_delimiter=word_delimiter)

    # merge objects table and numeric table
    df1 = objects_numeric.to_frame()
    df2 = first_objects.to_frame()
    objects = pd.merge(df1, df2, left_on='_id_', right_on='_id_', how='left')

    objects = conn.upload_frame(objects,
                                casout=dict(name='objects', replace=True))
    # remove unnecessary columns
    useful_vars = list(objects_numeric.columns)
    useful_vars.append('_filename_0')
    useful_vars.append('first_objects')
    bad_columns = set(list(objects.columns)) - set(useful_vars)
    final_objects = objects.drop(bad_columns, axis=1)

    return final_objects
Exemplo n.º 23
0
def Faster_RCNN(conn,
                model_table='Faster_RCNN',
                n_channels=3,
                width=1000,
                height=496,
                scale=1,
                norm_stds=None,
                offsets=(102.9801, 115.9465, 122.7717),
                random_mutation=None,
                n_classes=20,
                anchor_num_to_sample=256,
                anchor_ratio=[0.5, 1, 2],
                anchor_scale=[8, 16, 32],
                base_anchor_size=16,
                coord_type='coco',
                max_label_per_image=200,
                proposed_roi_num_train=2000,
                proposed_roi_num_score=300,
                roi_train_sample_num=128,
                roi_pooling_height=7,
                roi_pooling_width=7,
                nms_iou_threshold=0.3,
                detection_threshold=0.5,
                max_object_num=50,
                number_of_neurons_in_fc=4096,
                backbone='vgg16',
                random_flip=None,
                random_crop=None):
    '''
    Generates a deep learning model with the faster RCNN architecture.

    Parameters
    ----------
    conn : CAS
        Specifies the connection of the CAS connection.
    model_table : string, optional
        Specifies the name of CAS table to store the model.
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 1000
    height : int, optional
        Specifies the height of the input layer.
        Default: 496
    scale : double, optional
        Specifies a scaling factor to be applied to each pixel intensity values.
        Default: 1
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the
        input layer.
        Valid Values: 'none', 'random'
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 20
    anchor_num_to_sample : int, optional
        Specifies the number of anchors to sample for training the region proposal network
        Default: 256
    anchor_ratio : iter-of-float
        Specifies the anchor height and width ratios (h/w) used.
    anchor_scale : iter-of-float
        Specifies the anchor scales used based on base_anchor_size
    base_anchor_size : int, optional
        Specifies the basic anchor size in width and height (in pixels) in the original input image dimension
        Default: 16
    coord_type : int, optional
        Specifies the coordinates format type in the input label and detection result.
        Valid Values: RECT, COCO, YOLO
        Default: COCO
    proposed_roi_num_score: int, optional
        Specifies the number of ROI (Region of Interest) to propose in the scoring phase
        Default: 300
    proposed_roi_num_train: int, optional
        Specifies the number of ROI (Region of Interest) to propose used for RPN training, and also the pool to
        sample from for FastRCNN Training in the training phase
        Default: 2000
    roi_train_sample_num: int, optional
        Specifies the number of ROIs(Regions of Interests) to sample after NMS(Non-maximum Suppression)
        is performed in the training phase.
        Default: 128
    roi_pooling_height : int, optional
        Specifies the output height of the region pooling layer.
        Default: 7
    roi_pooling_width : int, optional
        Specifies the output width of the region pooling layer.
        Default: 7
    max_label_per_image : int, optional
        Specifies the maximum number of labels per image in the training.
        Default: 200
    nms_iou_threshold: float, optional
        Specifies the IOU threshold of maximum suppression in object detection
        Default: 0.3
    detection_threshold : float, optional
        Specifies the threshold for object detection.
        Default: 0.5
    max_object_num: int, optional
        Specifies the maximum number of object to detect
        Default: 50
    number_of_neurons_in_fc: int, or list of int, optional
        Specifies the number of neurons in the last two fully connected layers. If one int is set, then
        both of the layers will have the same values. If a list is set, then the layers get different
        number of neurons.
        Default: 4096
    backbone: string, optional
        Specifies the architecture to be used as the feature extractor.
        Valid values: vgg16
        Default: vgg16, resnet50, resnet18, resnet34, mobilenetv1, mobilenetv2
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'

    Returns
    -------
    :class:`Sequential`

    References
    ----------
    https://arxiv.org/abs/1506.01497

    '''
    # calculate number of anchors that equal to product of length of anchor_ratio and length of anchor_scale
    num_anchors = len(anchor_ratio) * len(anchor_scale)
    parameters = locals()
    # get parameters of input, rpn, fast_rcnn layer
    input_parameters = get_layer_options(input_layer_options, parameters)
    rpn_parameters = get_layer_options(rpn_layer_options, parameters)
    fast_rcnn_parameters = get_layer_options(fast_rcnn_options, parameters)
    inp = Input(**input_parameters, name='data')

    if backbone.lower() == 'vgg16':
        # backbone is VGG16 model
        conv1_1 = Conv2d(n_filters=64,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv1_1')(inp)
        conv1_2 = Conv2d(n_filters=64,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv1_2')(conv1_1)
        pool1 = Pooling(width=2, height=2, stride=2, pool='max',
                        name='pool1')(conv1_2)

        conv2_1 = Conv2d(n_filters=128,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv2_1')(pool1)
        conv2_2 = Conv2d(n_filters=128,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv2_2')(conv2_1)
        pool2 = Pooling(width=2, height=2, stride=2, pool='max')(conv2_2)

        conv3_1 = Conv2d(n_filters=256,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv3_1')(pool2)
        conv3_2 = Conv2d(n_filters=256,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv3_2')(conv3_1)
        conv3_3 = Conv2d(n_filters=256,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv3_3')(conv3_2)
        pool3 = Pooling(width=2, height=2, stride=2, pool='max')(conv3_3)

        conv4_1 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv4_1')(pool3)
        conv4_2 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv4_2')(conv4_1)
        conv4_3 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv4_3')(conv4_2)
        pool4 = Pooling(width=2, height=2, stride=2, pool='max')(conv4_3)

        conv5_1 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv5_1')(pool4)
        conv5_2 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv5_2')(conv5_1)
        # feature of Conv5_3 is used to generate region proposals
        last_layer_in_backbone = Conv2d(n_filters=512,
                                        width=3,
                                        height=3,
                                        stride=1,
                                        name='conv5_3')(conv5_2)
        # two convolutions build on top of conv5_3 and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)

        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)

        # given ROIs, crop on conv5_3 and resize the feature to the same size
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone.shape[0] / width,
            name='roi_pooling')([last_layer_in_backbone, rp1])

    elif backbone.lower() == 'resnet50':

        from .resnet import ResNet50_SAS
        backbone = ResNet50_SAS(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'resnet34':
        from .resnet import ResNet34_SAS
        backbone = ResNet34_SAS(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'resnet18':
        from .resnet import ResNet18_SAS
        backbone = ResNet18_SAS(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'mobilenetv1':
        from .mobilenet import MobileNetV1
        backbone = MobileNetV1(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'mobilenetv2':
        from .mobilenet import MobileNetV2
        backbone = MobileNetV2(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])
    else:
        raise DLPyError('We are not supporting this backbone yet.')

    # fully connect layer to extract the feature of ROIs
    if number_of_neurons_in_fc is None:
        fc6 = Dense(n=4096, act='relu', name='fc6')(roipool1)
        fc7 = Dense(n=4096, act='relu', name='fc7')(fc6)
    else:
        if isinstance(number_of_neurons_in_fc, list):
            if len(number_of_neurons_in_fc) > 1:
                fc6 = Dense(n=number_of_neurons_in_fc[0],
                            act='relu',
                            name='fc6')(roipool1)
                fc7 = Dense(n=number_of_neurons_in_fc[1],
                            act='relu',
                            name='fc7')(fc6)
            else:
                fc6 = Dense(n=number_of_neurons_in_fc[0],
                            act='relu',
                            name='fc6')(roipool1)
                fc7 = Dense(n=number_of_neurons_in_fc[0],
                            act='relu',
                            name='fc7')(fc6)
        else:
            fc6 = Dense(n=number_of_neurons_in_fc, act='relu',
                        name='fc6')(roipool1)
            fc7 = Dense(n=number_of_neurons_in_fc, act='relu', name='fc7')(fc6)
    # classification tensor
    cls1 = Dense(n=n_classes + 1, act='identity', name='cls_score')(fc7)
    # regression tensor(second stage bounding box regression)
    reg1 = Dense(n=(n_classes + 1) * 4, act='identity', name='bbox_pred')(fc7)
    # task layer receive cls1, reg1 and rp1(ground truth). Train the second stage.
    fr1 = FastRCNN(**fast_rcnn_parameters,
                   class_number=n_classes,
                   name='fastrcnn')([cls1, reg1, rp1])
    faster_rcnn = Model(conn, inp, fr1, model_table=model_table)
    faster_rcnn.compile()
    return faster_rcnn
Exemplo n.º 24
0
def segment_audio(path, local_path, data_path_after_caslib, segment_len,
                  framerate, sampwidth):
    """
    Segment the audio into pieces shorter than segment_len.

    Parameters
    ----------
    path : string
        Specifies path of the audio file.
    local_path : string
        Specifies the location where temporary segmented audio files are stored (server side).
    data_path_after_caslib : string
        Specifies the location where temporary segmented audio files are stored (client side, relative to caslib).
        Note that local_path and data_path_after_caslib actually point to the same position.
    segment_len : float
        Specifies the maximum length of one segment in seconds.
    framerate : int
        Specifies the desired framerate.
    sampwidth : int
        Specifies the desired sampwidth.

    Returns
    -------
    listing_path_after_caslib : string
        Path of the file listing the audio segments on the server side, relative to caslib.
    listing_path_local : string
        Path of the file listing the audio segments on the client side.
    segment_path_after_caslib_list : list of string
        A list of paths of the audio segments on the server side, relative to caslib.
    segment_path_local_list : list of string
        A list of paths of the audio segments on client side.

    """

    if os.path.isfile(path):
        wave_reader, wave_params = read_audio(path)
    else:
        raise DLPyError("Cannot find the audio file.")

    if segment_len <= 0:
        raise DLPyError(
            "Incorrect \"segment_len\" value: the segment length maximum can only be positive."
        )
    if segment_len > 35:
        raise DLPyError(
            "Incorrect \"segment_len\" value: the segment length maximum cannot be longer than 35 seconds."
        )

    is_framerate_desired = check_framerate(wave_params, framerate)
    is_sampwidth_desired = check_sampwidth(wave_params, sampwidth)
    is_stereo = check_stereo(wave_params)

    # generate the listing file name
    audio_name = os.path.basename(path)
    audio_name = os.path.splitext(audio_name)[0]
    listing_name_no_ext = None
    listing_name = None
    while listing_name is None:
        listing_name_no_ext = random_name(audio_name, 6)
        listing_name = listing_name_no_ext + ".listing"
        listing_path_after_caslib = data_path_after_caslib + listing_name
        listing_path_local = os.path.join(local_path, listing_name)
        if os.path.exists(listing_path_local):
            listing_name = None

    # segmentation
    segment_nframes_list = calculate_segment_nframes(path, segment_len)
    print("Note:", str(len(segment_nframes_list)),
          "temporary audio files are created.")

    segment_path_after_caslib_list = []
    segment_path_local_list = []
    with open(listing_path_local, "w") as listing_file:
        wave_reader.rewind()
        for i in range(len(segment_nframes_list)):
            segment_name = listing_name_no_ext + "_" + str(i) + ".wav"
            segment_path_after_caslib = data_path_after_caslib + segment_name
            segment_path_local = os.path.join(local_path, segment_name)

            with wave.open(segment_path_local, "wb") as wave_writer:
                segment_path_after_caslib_list.append(
                    segment_path_after_caslib)
                segment_path_local_list.append(segment_path_local)
                wave_writer.setnchannels(1)
                wave_writer.setframerate(framerate)
                wave_writer.setsampwidth(sampwidth)
                wave_writer.setcomptype(wave_params.comptype,
                                        wave_params.compname)
                fragment = wave_reader.readframes(segment_nframes_list[i])
                if is_stereo:
                    fragment = convert_stereo_to_mono(fragment,
                                                      wave_params.sampwidth)

                if not is_framerate_desired:
                    fragment = convert_framerate(fragment,
                                                 wave_params.sampwidth, 1,
                                                 wave_params.framerate,
                                                 framerate)
                if not is_sampwidth_desired:
                    fragment = convert_sampwidth(fragment,
                                                 wave_params.sampwidth,
                                                 sampwidth)
                wave_writer.writeframes(fragment)
        wave_reader.close()

        for segment_path_after_caslib in segment_path_after_caslib_list:
            listing_file.write(segment_path_after_caslib + "\n")

    # listing_path_after_caslib: to load audio
    # listing_path_local: to remove listing file
    # segment_path_after_caslib_list: to concatenate results (add caslib path)
    # segment_path_local_list: to remove segmented files
    return listing_path_after_caslib, listing_path_local, segment_path_after_caslib_list, segment_path_local_list