def rollout_to_protobuf(document_words, document_tags): print("document_words") print(document_words) print("document_tags") print(document_tags) input_ids, input_masks, y_masks, text, ys = tokenize([document_words], [document_tags]) print("text") print(text) example_proto = Example(features=Features( feature={ 'text': Feature(bytes_list=tf.train.BytesList( value=[w.encode('utf-8') for w in text[0]])), 'input_ids': Feature(int64_list=tf.train.Int64List(value=input_ids)), 'input_masks': Feature(int64_list=tf.train.Int64List(value=input_masks)), 'y_masks': Feature(int64_list=tf.train.Int64List(value=y_masks)), 'labels': Feature(int64_list=tf.train.Int64List(value=ys)) })) writer.write(example_proto.SerializeToString())
def make_tf_examples(string_features, int_features, labels): int_features += [[label] for label in zero_norm_labels(labels)] string_features = [ Feature(bytes_list=BytesList(value=val)) for val in string_features ] int_features = [ Feature(int64_list=Int64List(value=val)) for val in int_features ] all_features = string_features + int_features return [ Example(features=Features( feature={ "left": left, "target": target, "right": right, "left_ids": left_ids, "target_ids": target_ids, "right_ids": right_ids, "labels": label, })) for ( left, target, right, left_ids, target_ids, right_ids, label, ) in zip(*split_list(all_features, parts=7)) ]
def create_examples(data, bert_client, training=True, label2int=None, class_weight=None): """ data: pd.DataFrame label2int: dict class_weight: list yield examples """ idx_start = data.index[0] A_encoded = bert_client.encode(data['title1_en'].tolist()) B_encoded = bert_client.encode(data['title2_en'].tolist()) for i in range(len(data)): feature = { 'A_encoded': Feature(float_list=FloatList(value=A_encoded[i])), 'B_encoded': Feature(float_list=FloatList(value=B_encoded[i])) } if training: label = label2int[data.loc[idx_start + i, 'label']] feature['label'] = Feature(int64_list=Int64List(value=[label])) feature['class_weight'] = Feature(float_list=FloatList( value=[class_weight[label]])) else: feature['id'] = Feature(int64_list=Int64List( value=[data.loc[idx_start + i, 'id']])) yield Example(features=Features(feature=feature))
def serialise_traj(data): features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']} features['seq_lens'] = Feature(int64_list=Int64List(value=[data['seq_lens'],])) example = Example(features=Features(feature=features)) return example.SerializeToString()
def serialise_vid(data): # seq_lens, masks, imgs, goal_imgs,label, label_embedding, tag = data['seq_lens'], data['masks'], data['imgs'], data['goal_imgs'], data['label'], data['label_embedding'], data['tag'] features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']} features['seq_lens'] = Feature(int64_list=Int64List(value=[data['seq_lens'],])) example = Example(features=Features(features)) return example.SerializeToString()
def save(mask, img, save_dir, count=0): mask = tf.io.encode_jpeg(mask) img = tf.io.encode_jpeg(img) Image_Buffer = Example(features=Features( feature={ "Image": Feature(bytes_list=BytesList(value=[img.numpy()])), "Mask": Feature(bytes_list=BytesList(value=[mask.numpy()])) })) with tf.io.TFRecordWriter(os.path.join(save_dir, f"Data-{count}.tfrecord")) as f: f.write(Image_Buffer.SerializeToString())
def create_example(features: np.ndarray, label: np.int32): return Example(features=Features( feature={ "features": NumpyToRecordConverter._bytes_feature( tf.io.serialize_tensor(features)), "label": Feature(int64_list=Int64List(value=[label])) })).SerializeToString()
def __encode_input(self, mr, input_encoder): """Encodes the input, and creates a TF Example record out of it.""" input_ids = input_encoder.encode(mr) input_ids.append(text_encoder.EOS_ID) features = {'inputs': Feature(int64_list=Int64List(value=input_ids))} example = Example(features=Features(feature=features)) return example.SerializeToString()
def get_cycle_example(cell_value, summary_idx, cycle_idx, scaling_factors): """ Define the columns that should be written to tfrecords and converts the raw data to "Example" objects. Every Example contains data from one charging cycle. The data is scaled (divided) by the corresponding values in "scaling_factors". """ # Summary feature values (scalars --> have to be wrapped in lists) ir_value = [ cell_value["summary"][cst.INTERNAL_RESISTANCE_NAME][summary_idx] / scaling_factors[cst.INTERNAL_RESISTANCE_NAME] ] qd_value = [ cell_value["summary"][cst.QD_NAME][summary_idx] / scaling_factors[cst.QD_NAME] ] rc_value = [ cell_value["summary"][cst.REMAINING_CYCLES_NAME][summary_idx] / scaling_factors[cst.REMAINING_CYCLES_NAME] ] dt_value = [ cell_value["summary"][cst.DISCHARGE_TIME_NAME][summary_idx] / scaling_factors[cst.DISCHARGE_TIME_NAME] ] cc_value = [float(cycle_idx) / scaling_factors[cst.REMAINING_CYCLES_NAME] ] # Same scale --> same scaling factor # Detail feature values (arrays) qdlin_value = cell_value["cycles"][cycle_idx][ cst.QDLIN_NAME] / scaling_factors[cst.QDLIN_NAME] tdlin_value = cell_value["cycles"][cycle_idx][ cst.TDLIN_NAME] / scaling_factors[cst.TDLIN_NAME] # Wrapping as example cycle_example = Example(features=Features( feature={ cst.INTERNAL_RESISTANCE_NAME: Feature(float_list=FloatList(value=ir_value)), cst.QD_NAME: Feature(float_list=FloatList(value=qd_value)), cst.REMAINING_CYCLES_NAME: Feature(float_list=FloatList(value=rc_value)), cst.DISCHARGE_TIME_NAME: Feature(float_list=FloatList(value=dt_value)), cst.QDLIN_NAME: Feature(float_list=FloatList(value=qdlin_value)), cst.TDLIN_NAME: Feature(float_list=FloatList(value=tdlin_value)), cst.CURRENT_CYCLE_NAME: Feature(float_list=FloatList(value=cc_value)) })) return cycle_example
def main(): model, signature, batch_file_path, sentence, target = parse_args() feat_dict = {"sentences": [], "targets": []} if batch_file_path is not None: with open(batch_file_path, "r") as batch_file: fieldnames = ["target", "sentence"] csvreader = DictReader(batch_file, fieldnames=fieldnames) for row in csvreader: feat_dict["targets"].append(row["target"].strip()) feat_dict["sentences"].append(row["sentence"].strip()) else: feat_dict["targets"].append(target) feat_dict["sentences"].append(sentence) l_ctxts, trgs, r_ctxts = FeatureProvider.partition_sentences( sentences=feat_dict["sentences"], targets=feat_dict["targets"], offsets=FeatureProvider.get_target_offset_array(feat_dict), ) l_enc = [ FeatureProvider.tf_encode_tokens(tokens) for tokens in FeatureProvider.tokenize_phrases(l_ctxts) ] trg_enc = [ FeatureProvider.tf_encode_tokens(tokens) for tokens in FeatureProvider.tokenize_phrases(trgs) ] r_enc = [ FeatureProvider.tf_encode_tokens(tokens) for tokens in FeatureProvider.tokenize_phrases(r_ctxts) ] tf_examples = [] for left, target, right in zip(l_enc, trg_enc, r_enc): features = Features( feature={ "left": Feature(bytes_list=BytesList(value=left)), "target": Feature(bytes_list=BytesList(value=target)), "right": Feature(bytes_list=BytesList(value=right)), } ) tf_example = Example(features=features) tf_examples.append(tf_example.SerializeToString()) tensor_proto = make_tensor_proto( tf_examples, dtype=tf_string, shape=[len(tf_examples)] ) channel = insecure_channel("127.0.0.1:8500") stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # CLASSIFICATION classification_req = ClassificationRequest() inputs = Input(example_list=ExampleList(examples=[tf_example])) classification_req.input.CopyFrom(inputs) # pylint: disable=E1101 classification_req.model_spec.name = "lg" # pylint: disable=E1101 classification = stub.Classify(classification_req, 60.0) print(classification) # PREDICTION prediction_req = PredictRequest() prediction_req.inputs["instances"].CopyFrom( # pylint: disable=E1101 tensor_proto ) prediction_req.model_spec.signature_name = ( # pylint: disable=E1101 signature ) prediction_req.model_spec.name = model # pylint: disable=E1101 prediction = stub.Predict(prediction_req, 60.0) print(prediction)
def _bytes_feature(value): """Returns a bytes_list from a string / byte.""" if isinstance(value, type(tf.constant(0))): value = value.numpy( ) # BytesList won't unpack a string from an EagerTensor. return Feature(bytes_list=BytesList(value=[value]))
def convert_to_example( adj, feature, label_data=None, label_mask=None, ): """ Writes graph related data to disk. """ adj_row, adj_col = np.nonzero(adj) adj_values = adj[adj_row, adj_col] adj_elem_len = len(adj_row) degrees = np.sum(adj, 0) adj_degrees = [] for ar, ac in zip(adj_row, adj_col): if ar == ac: adj_degrees.append(0) else: adj_degrees.append(int(degrees[ar])) feature = np.array(feature) feature_row, feature_col = np.nonzero(feature) feature_values = feature[feature_row, feature_col] feature_elem_len = len(feature_row) feature = { 'adj_row': Feature(int64_list=Int64List(value=list(adj_row))), 'adj_column': Feature(int64_list=Int64List(value=list(adj_col))), 'adj_values': Feature(float_list=FloatList(value=list(adj_values))), 'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])), 'adj_degrees': Feature(int64_list=Int64List(value=adj_degrees)), 'feature_row': Feature(int64_list=Int64List(value=list(feature_row))), 'feature_column': Feature(int64_list=Int64List(value=list(feature_col))), 'feature_values': Feature(float_list=FloatList(value=list(feature_values))), 'feature_elem_len': Feature(int64_list=Int64List(value=[feature_elem_len])), 'size': Feature(int64_list=Int64List(value=list(feature.shape))) } if label_data is not None: label_data = np.nan_to_num(label_data) feature['label'] = Feature(int64_list=Int64List( value=label_data.astype(int))) feature['mask_label'] = Feature(int64_list=Int64List( value=label_mask.astype(int))), features = Features(feature=feature) ex = Example(features=features) return ex.SerializeToString()
def _create_int_feature(self, values): return Feature(int64_list=Int64List(value=list(values)))
def serialise(data): ID,pos, dimensions, color, border, fill, text, img, seq_len, seq_mask = data['ID'], data['pos'], data['dimensions'], data['color'], \ data['border'], data['fill'], data['text'], data['img'], \ int(data['seq_len']), data['seq_mask'] \ ID = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(ID).numpy(),])) pos = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(pos, tf.float32)).numpy(),])) dimensions = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(dimensions, tf.float32)).numpy(),])) color = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(color, tf.float32)).numpy(),])) border = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(border, tf.float32)).numpy(),])) fill = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(fill, tf.float32)).numpy(),])) text = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(text, tf.float32)).numpy(),])) img = Feature(bytes_list=BytesList(value=[img.numpy(),])) seq_len = Feature(int64_list=Int64List(value=[seq_len,])) seq_mask = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(seq_mask).numpy(),])) # img is already serialised because we never decode it! features = Features(feature={ 'ID' : ID, 'pos' : pos, 'dimensions' : dimensions, 'color' : color, 'border' : border, 'fill' : fill, 'text' : text, 'img': img, 'seq_len':seq_len, 'seq_mask':seq_mask, }) example = Example(features=features) return example.SerializeToString()
def serialise(data): obs, acts, goals, seq_lens, masks, dataset_path, tstep_idxs , imgs , goal_imgs, proprioceptive_features = data['obs'], \ data['acts'], data['goals'], data['seq_lens'], data['masks'], data['dataset_path'], data['tstep_idxs'], data['imgs'], data['goal_imgs'], data['proprioceptive_features'] # obs (1, 40, 18) # acts (1, 40, 7) # goals (1, 40, 11) # seq_lens (1,) # masks (1, 40) # dataset_path (1, 40) # tstep_idxs (1, 40) # imgs (1, 40, 200, 200, 3) # goal_imgs (1, 40, 200, 200, 3) # proprioceptive_features (1, 40, 7) goal_imgs = tf.expand_dims( goal_imgs[:, 0, :, :, :], 1) # crete a :, 1, :,:,: shaped goal images for less file IO obs = Feature(bytes_list=BytesList(value=[ tf.io.serialize_tensor(tf.squeeze(obs)).numpy(), ])) acts = Feature(bytes_list=BytesList(value=[ tf.io.serialize_tensor(tf.squeeze(acts)).numpy(), ])) goals = Feature(bytes_list=BytesList(value=[ tf.io.serialize_tensor(tf.squeeze(goals)).numpy(), ])) seq_lens = Feature(int64_list=Int64List(value=[ seq_lens, ])) masks = Feature(bytes_list=BytesList(value=[ tf.io.serialize_tensor(tf.squeeze(masks)).numpy(), ])) imgs = Feature(bytes_list=BytesList(value=[ tf.io.serialize_tensor(tf.squeeze(imgs)).numpy(), ])) goal_imgs = Feature(bytes_list=BytesList(value=[ tf.io.serialize_tensor(tf.squeeze(goal_imgs)).numpy(), ])) proprioceptive_features = Feature(bytes_list=BytesList(value=[ tf.io.serialize_tensor(tf.squeeze(proprioceptive_features)).numpy(), ])) features = Features( feature={ 'obs': obs, 'acts': acts, 'goals': goals, 'seq_lens': seq_lens, 'masks': masks, 'imgs': imgs, 'goal_imgs': goal_imgs, 'proprioceptive_features': proprioceptive_features }) example = Example(features=features) return example.SerializeToString() # Sample Usage # r = lfp.data.PlayDataloader(include_imgs = args.images, batch_size=1, window_size=args.window_size_max, min_window_size=args.window_size_min) # rd = r.extract(TRAIN_DATA_PATHS, from_tfrecords=args.from_tfrecords) # rd = r.load(rd) # r_it = iter(rd) # @tf.function # def sample(): # return r_it.next() # data_paths = [str(STORAGE_PATH/'precompute')+f"/{x}.tfrecords" for x in range(0,8)] # #@title write to gcs # from tqdm import tqdm # for path in data_paths: # with tf.io.TFRecordWriter(path) as file_writer: # print(path) # for i in tqdm(range(0,200)): # byte_stream = serialise(sample()) # file_writer.write(byte_stream)
def write_to_tfrecords(adj, feature, label_data, label_mask, tfrname): """ Writes graph related data to disk. """ adj_row, adj_col = np.nonzero(adj) adj_values = adj[adj_row, adj_col] adj_elem_len = len(adj_row) feature = np.array(feature) feature_row, feature_col = np.nonzero(feature) feature_values = feature[feature_row, feature_col] feature_elem_len = len(feature_row) features = Features( feature={ 'label': Feature(int64_list=Int64List(value=label_data)), 'mask_label': Feature(int64_list=Int64List(value=label_mask)), 'adj_row': Feature(int64_list=Int64List(value=list(adj_row))), 'adj_column': Feature(int64_list=Int64List(value=list(adj_col))), 'adj_values': Feature(float_list=FloatList(value=list(adj_values))), 'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])), 'feature_row': Feature(int64_list=Int64List(value=list(feature_row))), 'feature_column': Feature(int64_list=Int64List(value=list(feature_col))), 'feature_values': Feature(float_list=FloatList(value=list(feature_values))), 'feature_elem_len': Feature(int64_list=Int64List(value=[feature_elem_len])), 'size': Feature(int64_list=Int64List(value=list(feature.shape))) }) ex = Example(features=features) with TFRecordWriter(tfrname) as single_writer: single_writer.write(ex.SerializeToString())