예제 #1
0
    def _get_array_attributes(self, prefix=''):
        """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples.

    Parameters
    ----------
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    array_attributes : dict
      The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example.

    """
        att_dict = {}
        att_dict['nums'] = {
            'shape': list([len(self.cols)]),
            'tf_type': feat.select_tf_dtype(self.dtype),
            'size': feat.size_from_shape([len(self.cols)]),
            'feature_func': feat.select_feature_func(self.dtype),
            'np_type': self.dtype
        }
        att_dict['nans'] = {
            'shape': [len(self.cols)],
            'tf_type': tf.int64,
            'size': feat.size_from_shape([len(self.cols)]),
            'feature_func': feat._int_feat,
            'np_type': np.bool
        }

        att_dict = self._pre(att_dict, prefix)
        return att_dict
    def _get_array_attributes(self, prefix=''):
        """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples.

    Parameters
    ----------
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    dict
      The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example.

    """
        att_dict = {}
        shape = list(self.input_shape[1:]) + [self.max_doc_len]
        att_dict['sentences'] = {
            'shape': shape if self.keep_dims else [1],
            'tf_type': tf.string,
            'size': feat.size_from_shape(shape) if self.keep_dims else 1,
            'feature_func': feat._bytes_feat,
            'np_type': np.unicode
        }
        if not self.keep_dims:
            att_dict['ids'] = {
                'shape': [],
                'tf_type': tf.string,
                'size': 1,
                'feature_func': feat._bytes_feat,
                'np_type': np.unicode
            }
        att_dict = self._pre(att_dict, prefix)
        return att_dict
예제 #3
0
    def _get_array_attributes(self, prefix=''):
        """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples.

    Parameters
    ----------
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    dict
      The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example.

    """
        att_dict = {}
        att_dict['missing_vals'] = {
            'shape': list(self.input_shape[1:]),
            'tf_type': feat.select_tf_dtype(self.input_dtype),
            'size': feat.size_from_shape(self.input_shape[1:]),
            'feature_func': feat.select_feature_func(self.input_dtype),
            'np_type': self.input_dtype
        }
        one_hots_shape = list(
            self.input_shape[1:]) + [len(self.index_to_cat_val)]
        att_dict['one_hots'] = {
            'shape': one_hots_shape,
            'tf_type': feat.select_tf_dtype(self.dtype),
            'size': feat.size_from_shape(one_hots_shape),
            'feature_func': feat.select_feature_func(self.dtype),
            'np_type': self.dtype
        }
        att_dict['indices'] = {
            'shape': list(self.input_shape[1:]),
            'tf_type': tf.int64,
            'size': feat.size_from_shape(self.input_shape[1:]),
            'feature_func': feat._int_feat,
            'np_type': np.int64
        }

        att_dict = self._pre(att_dict, prefix)
        return att_dict
예제 #4
0
    def _get_array_attributes(self, prefix=''):
        """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples.

    Parameters
    ----------
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    array_attributes : dict
      The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example.

    """
        # Create the list of all the keys expected from the example_dicts
        array_keys = self._get_array_keys()

        # Get the original array's shape, except for the batch dim.
        shape = list([len(self.cols)])

        att_dict = {}
        for key in array_keys:

            # Add a max_sent_len dim to the tokenize_diff array since it has a
            # diff for each token, otherwise give it input array's shape.
            cur_shape = shape if key == 'tokenize_diff' else shape + [
                self.max_sent_len
            ]

            att_dict[key] = {
                'shape': list(cur_shape),
                'tf_type': tf.int64 if key == 'indices' else tf.string,
                'size': feat.size_from_shape(cur_shape),
                'feature_func':
                feat._int_feat if key == 'indices' else feat._bytes_feat,
                'np_type': np.int64 if key == 'indices' else np.unicode
            }
        att_dict = self._pre(att_dict, prefix)
        return att_dict