def _get_array_attributes(self, prefix=''): """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples. Parameters ---------- prefix : str Any additional prefix string/dictionary keys start with. Defaults to no additional prefix. Returns ------- array_attributes : dict The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example. """ att_dict = {} att_dict['nums'] = { 'shape': list([len(self.cols)]), 'tf_type': feat.select_tf_dtype(self.dtype), 'size': feat.size_from_shape([len(self.cols)]), 'feature_func': feat.select_feature_func(self.dtype), 'np_type': self.dtype } att_dict['nans'] = { 'shape': [len(self.cols)], 'tf_type': tf.int64, 'size': feat.size_from_shape([len(self.cols)]), 'feature_func': feat._int_feat, 'np_type': np.bool } att_dict = self._pre(att_dict, prefix) return att_dict
def _get_array_attributes(self, prefix=''): """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples. Parameters ---------- prefix : str Any additional prefix string/dictionary keys start with. Defaults to no additional prefix. Returns ------- dict The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example. """ att_dict = {} shape = list(self.input_shape[1:]) + [self.max_doc_len] att_dict['sentences'] = { 'shape': shape if self.keep_dims else [1], 'tf_type': tf.string, 'size': feat.size_from_shape(shape) if self.keep_dims else 1, 'feature_func': feat._bytes_feat, 'np_type': np.unicode } if not self.keep_dims: att_dict['ids'] = { 'shape': [], 'tf_type': tf.string, 'size': 1, 'feature_func': feat._bytes_feat, 'np_type': np.unicode } att_dict = self._pre(att_dict, prefix) return att_dict
def _get_array_attributes(self, prefix=''): """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples. Parameters ---------- prefix : str Any additional prefix string/dictionary keys start with. Defaults to no additional prefix. Returns ------- dict The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example. """ att_dict = {} att_dict['missing_vals'] = { 'shape': list(self.input_shape[1:]), 'tf_type': feat.select_tf_dtype(self.input_dtype), 'size': feat.size_from_shape(self.input_shape[1:]), 'feature_func': feat.select_feature_func(self.input_dtype), 'np_type': self.input_dtype } one_hots_shape = list( self.input_shape[1:]) + [len(self.index_to_cat_val)] att_dict['one_hots'] = { 'shape': one_hots_shape, 'tf_type': feat.select_tf_dtype(self.dtype), 'size': feat.size_from_shape(one_hots_shape), 'feature_func': feat.select_feature_func(self.dtype), 'np_type': self.dtype } att_dict['indices'] = { 'shape': list(self.input_shape[1:]), 'tf_type': tf.int64, 'size': feat.size_from_shape(self.input_shape[1:]), 'feature_func': feat._int_feat, 'np_type': np.int64 } att_dict = self._pre(att_dict, prefix) return att_dict
def _get_array_attributes(self, prefix=''): """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples. Parameters ---------- prefix : str Any additional prefix string/dictionary keys start with. Defaults to no additional prefix. Returns ------- array_attributes : dict The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example. """ # Create the list of all the keys expected from the example_dicts array_keys = self._get_array_keys() # Get the original array's shape, except for the batch dim. shape = list([len(self.cols)]) att_dict = {} for key in array_keys: # Add a max_sent_len dim to the tokenize_diff array since it has a # diff for each token, otherwise give it input array's shape. cur_shape = shape if key == 'tokenize_diff' else shape + [ self.max_sent_len ] att_dict[key] = { 'shape': list(cur_shape), 'tf_type': tf.int64 if key == 'indices' else tf.string, 'size': feat.size_from_shape(cur_shape), 'feature_func': feat._int_feat if key == 'indices' else feat._bytes_feat, 'np_type': np.int64 if key == 'indices' else np.unicode } att_dict = self._pre(att_dict, prefix) return att_dict