def _write_numpy_to_dense_tensor(file, array, labels=None): """Writes a numpy array to a dense record Args: file (file-like object): file-like object where the records will be written array (numpy array): numpy array containing the features labels (numpy array): numpy array containing the labels """ # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape ) ) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) # Write each vector in array into a Record in the file object record = Record() for index, vector in enumerate(array): record.Clear() _write_feature_tensor(resolved_type, record, vector) if labels is not None: _write_label_tensor(resolved_label_type, record, labels[index]) _write_recordio(file, record.SerializeToString())
def _write_spmatrix_to_sparse_tensor(file, array, labels=None): """Writes a scipy sparse matrix to a sparse tensor. Args: file (file-like object): file-like object where the records will be written array (array-like): a sparse matrix containing features labels (numpy array): numpy array containing the labels """ if not issparse(array): raise TypeError("Array must be sparse") # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape ) ) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) csr_array = array.tocsr() n_rows, n_cols = csr_array.shape record = Record() for row_idx in range(n_rows): record.Clear() row = csr_array.getrow(row_idx) # Write values _write_feature_tensor(resolved_type, record, row.data) # Write keys _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64)) # Write labels if labels is not None: _write_label_tensor(resolved_label_type, record, labels[row_idx]) # Write shape _write_shape(resolved_type, record, n_cols) _write_recordio(file, record.SerializeToString())
def _encode_selected_predictions_recordio_protobuf(predictions): """Encode predictions in recordio-protobuf format. For each prediction, a new record is created. The content is populated under the "label" field of a record where the keys are derived from the selected content keys. Every value is encoded to a float32 tensor. :param predictions: output of serve_utils.get_selected_predictions(...) (list of dict) :return: predictions in recordio-protobuf response format (bytes) """ record_bio = io.BytesIO() recordio_bio = io.BytesIO() record = Record() for item in predictions: for key in item.keys(): value = item[key] if type(item[key]) is list else [item[key]] _write_record(record, key, value) record_bio.write(record.SerializeToString()) record.Clear() _write_recordio(recordio_bio, record_bio.getvalue()) return recordio_bio.getvalue()