def test_array_to_recordio_dense(): array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] buf = _encoders.array_to_recordio_protobuf(np.array(array_data)) for record_data, expected in zip(_read_recordio(buf), array_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].float64_tensor.values == expected
def _write_numpy_to_dense_tensor(file, array, labels=None): """Writes a numpy array to a dense record Args: file (file-like object): file-like object where the records will be written array (numpy array): numpy array containing the features labels (numpy array): numpy array containing the labels """ # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape ) ) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) # Write each vector in array into a Record in the file object record = Record() for index, vector in enumerate(array): record.Clear() _write_feature_tensor(resolved_type, record, vector) if labels is not None: _write_label_tensor(resolved_label_type, record, labels[index]) _write_recordio(file, record.SerializeToString())
def test_encode_selected_predictions_protobuf(): expected_predicted_labels = [[1], [0]] expected_probabilities = [[0.4, 0.6], [0.9, 0.1]] protobuf_response = serve_utils.encode_selected_predictions(TEST_PREDICTIONS, TEST_KEYS, "application/x-recordio-protobuf") stream = io.BytesIO(protobuf_response) for recordio, predicted_label, probabilities in zip(_read_recordio(stream), expected_predicted_labels, expected_probabilities): record = Record() record.ParseFromString(recordio) assert record.label["predicted_label"].float32_tensor.values == predicted_label assert all(np.isclose(record.label["probabilities"].float32_tensor.values, probabilities))
def _write_spmatrix_to_sparse_tensor(file, array, labels=None): """Writes a scipy sparse matrix to a sparse tensor. Args: file (file-like object): file-like object where the records will be written array (array-like): a sparse matrix containing features labels (numpy array): numpy array containing the labels """ if not issparse(array): raise TypeError("Array must be sparse") # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape ) ) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) csr_array = array.tocsr() n_rows, n_cols = csr_array.shape record = Record() for row_idx in range(n_rows): record.Clear() row = csr_array.getrow(row_idx) # Write values _write_feature_tensor(resolved_type, record, row.data) # Write keys _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64)) # Write labels if labels is not None: _write_label_tensor(resolved_label_type, record, labels[row_idx]) # Write shape _write_shape(resolved_type, record, n_cols) _write_recordio(file, record.SerializeToString())
def _encode_selected_predictions_recordio_protobuf(predictions): """Encode predictions in recordio-protobuf format. For each prediction, a new record is created. The content is populated under the "label" field of a record where the keys are derived from the selected content keys. Every value is encoded to a float32 tensor. :param predictions: output of serve_utils.get_selected_predictions(...) (list of dict) :return: predictions in recordio-protobuf response format (bytes) """ record_bio = io.BytesIO() recordio_bio = io.BytesIO() record = Record() for item in predictions: for key in item.keys(): value = item[key] if type(item[key]) is list else [item[key]] _write_record(record, key, value) record_bio.write(record.SerializeToString()) record.Clear() _write_recordio(recordio_bio, record_bio.getvalue()) return recordio_bio.getvalue()
def test_sparse_float32_write_spmatrix_to_sparse_tensor(): n = 4 array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] flatten_data = list(itertools.chain.from_iterable(array_data)) y_indices = list(itertools.chain.from_iterable(keys_data)) x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] x_indices = list(itertools.chain.from_iterable(x_indices)) array = sparse.coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float32") buf = _encoders.array_to_recordio_protobuf(array) for record_data, expected_data, expected_keys in zip( _read_recordio(buf), array_data, keys_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].float32_tensor.values == expected_data assert record.features["values"].float32_tensor.keys == expected_keys assert record.features["values"].float32_tensor.shape == [n]