Пример #1
0
    def rollout_to_protobuf(document_words, document_tags):
        print("document_words")
        print(document_words)
        print("document_tags")
        print(document_tags)
        input_ids, input_masks, y_masks, text, ys = tokenize([document_words],
                                                             [document_tags])

        print("text")
        print(text)
        example_proto = Example(features=Features(
            feature={
                'text':
                Feature(bytes_list=tf.train.BytesList(
                    value=[w.encode('utf-8') for w in text[0]])),
                'input_ids':
                Feature(int64_list=tf.train.Int64List(value=input_ids)),
                'input_masks':
                Feature(int64_list=tf.train.Int64List(value=input_masks)),
                'y_masks':
                Feature(int64_list=tf.train.Int64List(value=y_masks)),
                'labels':
                Feature(int64_list=tf.train.Int64List(value=ys))
            }))
        writer.write(example_proto.SerializeToString())
Пример #2
0
def make_tf_examples(string_features, int_features, labels):
    int_features += [[label] for label in zero_norm_labels(labels)]
    string_features = [
        Feature(bytes_list=BytesList(value=val)) for val in string_features
    ]
    int_features = [
        Feature(int64_list=Int64List(value=val)) for val in int_features
    ]
    all_features = string_features + int_features
    return [
        Example(features=Features(
            feature={
                "left": left,
                "target": target,
                "right": right,
                "left_ids": left_ids,
                "target_ids": target_ids,
                "right_ids": right_ids,
                "labels": label,
            })) for (
                left,
                target,
                right,
                left_ids,
                target_ids,
                right_ids,
                label,
            ) in zip(*split_list(all_features, parts=7))
    ]
Пример #3
0
def create_examples(data,
                    bert_client,
                    training=True,
                    label2int=None,
                    class_weight=None):
    """
    data: pd.DataFrame
    label2int: dict
    class_weight: list

    yield examples
    """
    idx_start = data.index[0]

    A_encoded = bert_client.encode(data['title1_en'].tolist())
    B_encoded = bert_client.encode(data['title2_en'].tolist())

    for i in range(len(data)):
        feature = {
            'A_encoded': Feature(float_list=FloatList(value=A_encoded[i])),
            'B_encoded': Feature(float_list=FloatList(value=B_encoded[i]))
        }
        if training:
            label = label2int[data.loc[idx_start + i, 'label']]
            feature['label'] = Feature(int64_list=Int64List(value=[label]))
            feature['class_weight'] = Feature(float_list=FloatList(
                value=[class_weight[label]]))
        else:
            feature['id'] = Feature(int64_list=Int64List(
                value=[data.loc[idx_start + i, 'id']]))

        yield Example(features=Features(feature=feature))
Пример #4
0
def serialise_traj(data):
    
    features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']}
    features['seq_lens'] =  Feature(int64_list=Int64List(value=[data['seq_lens'],]))

    example = Example(features=Features(feature=features))
    
    
    return example.SerializeToString()
Пример #5
0
def serialise_vid(data):
    
    # seq_lens, masks, imgs, goal_imgs,label, label_embedding, tag = data['seq_lens'], data['masks'], data['imgs'], data['goal_imgs'], data['label'], data['label_embedding'], data['tag']
    
    features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']}
    features['seq_lens'] =  Feature(int64_list=Int64List(value=[data['seq_lens'],]))

    example = Example(features=Features(features))
    
    
    return example.SerializeToString()
Пример #6
0
def save(mask, img, save_dir, count=0):
    mask = tf.io.encode_jpeg(mask)
    img = tf.io.encode_jpeg(img)

    Image_Buffer = Example(features=Features(
        feature={
            "Image": Feature(bytes_list=BytesList(value=[img.numpy()])),
            "Mask": Feature(bytes_list=BytesList(value=[mask.numpy()]))
        }))

    with tf.io.TFRecordWriter(os.path.join(save_dir,
                                           f"Data-{count}.tfrecord")) as f:
        f.write(Image_Buffer.SerializeToString())
Пример #7
0
 def create_example(features: np.ndarray, label: np.int32):
     return Example(features=Features(
         feature={
             "features":
             NumpyToRecordConverter._bytes_feature(
                 tf.io.serialize_tensor(features)),
             "label":
             Feature(int64_list=Int64List(value=[label]))
         })).SerializeToString()
Пример #8
0
    def __encode_input(self, mr, input_encoder):
        """Encodes the input, and creates a TF Example record out of it."""

        input_ids = input_encoder.encode(mr)
        input_ids.append(text_encoder.EOS_ID)

        features = {'inputs': Feature(int64_list=Int64List(value=input_ids))}

        example = Example(features=Features(feature=features))

        return example.SerializeToString()
Пример #9
0
def get_cycle_example(cell_value, summary_idx, cycle_idx, scaling_factors):
    """
    Define the columns that should be written to tfrecords and converts the raw data
    to "Example" objects. Every Example contains data from one charging cycle.
    The data is scaled (divided) by the corresponding values in "scaling_factors".
    """
    # Summary feature values (scalars --> have to be wrapped in lists)
    ir_value = [
        cell_value["summary"][cst.INTERNAL_RESISTANCE_NAME][summary_idx] /
        scaling_factors[cst.INTERNAL_RESISTANCE_NAME]
    ]
    qd_value = [
        cell_value["summary"][cst.QD_NAME][summary_idx] /
        scaling_factors[cst.QD_NAME]
    ]
    rc_value = [
        cell_value["summary"][cst.REMAINING_CYCLES_NAME][summary_idx] /
        scaling_factors[cst.REMAINING_CYCLES_NAME]
    ]
    dt_value = [
        cell_value["summary"][cst.DISCHARGE_TIME_NAME][summary_idx] /
        scaling_factors[cst.DISCHARGE_TIME_NAME]
    ]
    cc_value = [float(cycle_idx) / scaling_factors[cst.REMAINING_CYCLES_NAME]
                ]  # Same scale --> same scaling factor

    # Detail feature values (arrays)
    qdlin_value = cell_value["cycles"][cycle_idx][
        cst.QDLIN_NAME] / scaling_factors[cst.QDLIN_NAME]
    tdlin_value = cell_value["cycles"][cycle_idx][
        cst.TDLIN_NAME] / scaling_factors[cst.TDLIN_NAME]

    # Wrapping as example
    cycle_example = Example(features=Features(
        feature={
            cst.INTERNAL_RESISTANCE_NAME:
            Feature(float_list=FloatList(value=ir_value)),
            cst.QD_NAME:
            Feature(float_list=FloatList(value=qd_value)),
            cst.REMAINING_CYCLES_NAME:
            Feature(float_list=FloatList(value=rc_value)),
            cst.DISCHARGE_TIME_NAME:
            Feature(float_list=FloatList(value=dt_value)),
            cst.QDLIN_NAME:
            Feature(float_list=FloatList(value=qdlin_value)),
            cst.TDLIN_NAME:
            Feature(float_list=FloatList(value=tdlin_value)),
            cst.CURRENT_CYCLE_NAME:
            Feature(float_list=FloatList(value=cc_value))
        }))
    return cycle_example
Пример #10
0
def main():

    model, signature, batch_file_path, sentence, target = parse_args()

    feat_dict = {"sentences": [], "targets": []}

    if batch_file_path is not None:
        with open(batch_file_path, "r") as batch_file:
            fieldnames = ["target", "sentence"]
            csvreader = DictReader(batch_file, fieldnames=fieldnames)
            for row in csvreader:
                feat_dict["targets"].append(row["target"].strip())
                feat_dict["sentences"].append(row["sentence"].strip())
    else:
        feat_dict["targets"].append(target)
        feat_dict["sentences"].append(sentence)

    l_ctxts, trgs, r_ctxts = FeatureProvider.partition_sentences(
        sentences=feat_dict["sentences"],
        targets=feat_dict["targets"],
        offsets=FeatureProvider.get_target_offset_array(feat_dict),
    )
    l_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(l_ctxts)
    ]
    trg_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(trgs)
    ]
    r_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(r_ctxts)
    ]

    tf_examples = []

    for left, target, right in zip(l_enc, trg_enc, r_enc):
        features = Features(
            feature={
                "left": Feature(bytes_list=BytesList(value=left)),
                "target": Feature(bytes_list=BytesList(value=target)),
                "right": Feature(bytes_list=BytesList(value=right)),
            }
        )
        tf_example = Example(features=features)
        tf_examples.append(tf_example.SerializeToString())

    tensor_proto = make_tensor_proto(
        tf_examples, dtype=tf_string, shape=[len(tf_examples)]
    )

    channel = insecure_channel("127.0.0.1:8500")
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    # CLASSIFICATION
    classification_req = ClassificationRequest()
    inputs = Input(example_list=ExampleList(examples=[tf_example]))
    classification_req.input.CopyFrom(inputs)  # pylint: disable=E1101
    classification_req.model_spec.name = "lg"  # pylint: disable=E1101
    classification = stub.Classify(classification_req, 60.0)
    print(classification)

    # PREDICTION
    prediction_req = PredictRequest()
    prediction_req.inputs["instances"].CopyFrom(  # pylint: disable=E1101
        tensor_proto
    )
    prediction_req.model_spec.signature_name = (  # pylint: disable=E1101
        signature
    )
    prediction_req.model_spec.name = model  # pylint: disable=E1101
    prediction = stub.Predict(prediction_req, 60.0)
    print(prediction)
Пример #11
0
 def _bytes_feature(value):
     """Returns a bytes_list from a string / byte."""
     if isinstance(value, type(tf.constant(0))):
         value = value.numpy(
         )  # BytesList won't unpack a string from an EagerTensor.
     return Feature(bytes_list=BytesList(value=[value]))
Пример #12
0
def convert_to_example(
    adj,
    feature,
    label_data=None,
    label_mask=None,
):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    degrees = np.sum(adj, 0)
    adj_degrees = []
    for ar, ac in zip(adj_row, adj_col):
        if ar == ac:
            adj_degrees.append(0)
        else:
            adj_degrees.append(int(degrees[ar]))
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    feature = {
        'adj_row': Feature(int64_list=Int64List(value=list(adj_row))),
        'adj_column': Feature(int64_list=Int64List(value=list(adj_col))),
        'adj_values': Feature(float_list=FloatList(value=list(adj_values))),
        'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])),
        'adj_degrees': Feature(int64_list=Int64List(value=adj_degrees)),
        'feature_row': Feature(int64_list=Int64List(value=list(feature_row))),
        'feature_column':
        Feature(int64_list=Int64List(value=list(feature_col))),
        'feature_values':
        Feature(float_list=FloatList(value=list(feature_values))),
        'feature_elem_len':
        Feature(int64_list=Int64List(value=[feature_elem_len])),
        'size': Feature(int64_list=Int64List(value=list(feature.shape)))
    }
    if label_data is not None:
        label_data = np.nan_to_num(label_data)
        feature['label'] = Feature(int64_list=Int64List(
            value=label_data.astype(int)))
        feature['mask_label'] = Feature(int64_list=Int64List(
            value=label_mask.astype(int))),
    features = Features(feature=feature)
    ex = Example(features=features)
    return ex.SerializeToString()
Пример #13
0
 def _create_int_feature(self, values):
     return Feature(int64_list=Int64List(value=list(values)))
Пример #14
0
def serialise(data):
    
    ID,pos, dimensions, color, border, fill, text, img, seq_len, seq_mask = data['ID'], data['pos'], data['dimensions'], data['color'], \
                                    data['border'], data['fill'], data['text'], data['img'], \
                                    int(data['seq_len']), data['seq_mask'] \

    ID = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(ID).numpy(),]))
    pos = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(pos, tf.float32)).numpy(),]))
    dimensions = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(dimensions, tf.float32)).numpy(),]))
    color = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(color, tf.float32)).numpy(),]))
    border = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(border, tf.float32)).numpy(),]))
    fill = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(fill, tf.float32)).numpy(),]))
    text = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(text, tf.float32)).numpy(),]))
    img = Feature(bytes_list=BytesList(value=[img.numpy(),]))
    seq_len =  Feature(int64_list=Int64List(value=[seq_len,]))
    seq_mask = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(seq_mask).numpy(),]))
    # img is already serialised because we never decode it!
    
    features = Features(feature={
                'ID' : ID,
                'pos' : pos,
                'dimensions' : dimensions,
                'color' : color,
                'border' : border,
                'fill' : fill,
                'text' : text,
                'img': img,
                'seq_len':seq_len,
                'seq_mask':seq_mask,
                })
    
    example = Example(features=features)
    
    return example.SerializeToString()
Пример #15
0
def serialise(data):

    obs, acts, goals, seq_lens, masks, dataset_path, tstep_idxs , imgs , goal_imgs, proprioceptive_features = data['obs'], \
    data['acts'], data['goals'], data['seq_lens'], data['masks'], data['dataset_path'], data['tstep_idxs'], data['imgs'], data['goal_imgs'], data['proprioceptive_features']

    # obs (1, 40, 18)
    # acts (1, 40, 7)
    # goals (1, 40, 11)
    # seq_lens (1,)
    # masks (1, 40)
    # dataset_path (1, 40)
    # tstep_idxs (1, 40)
    # imgs (1, 40, 200, 200, 3)
    # goal_imgs (1, 40, 200, 200, 3)
    # proprioceptive_features (1, 40, 7)

    goal_imgs = tf.expand_dims(
        goal_imgs[:, 0, :, :, :],
        1)  # crete a :, 1, :,:,: shaped goal images for less file IO

    obs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(obs)).numpy(),
    ]))
    acts = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(acts)).numpy(),
    ]))
    goals = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(goals)).numpy(),
    ]))
    seq_lens = Feature(int64_list=Int64List(value=[
        seq_lens,
    ]))
    masks = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(masks)).numpy(),
    ]))

    imgs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(imgs)).numpy(),
    ]))
    goal_imgs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(goal_imgs)).numpy(),
    ]))
    proprioceptive_features = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(proprioceptive_features)).numpy(),
    ]))

    features = Features(
        feature={
            'obs': obs,
            'acts': acts,
            'goals': goals,
            'seq_lens': seq_lens,
            'masks': masks,
            'imgs': imgs,
            'goal_imgs': goal_imgs,
            'proprioceptive_features': proprioceptive_features
        })

    example = Example(features=features)

    return example.SerializeToString()


# Sample Usage
# r = lfp.data.PlayDataloader(include_imgs = args.images, batch_size=1,  window_size=args.window_size_max, min_window_size=args.window_size_min)
# rd = r.extract(TRAIN_DATA_PATHS, from_tfrecords=args.from_tfrecords)
# rd = r.load(rd)
# r_it = iter(rd)

# @tf.function
# def sample():
#   return r_it.next()

# data_paths = [str(STORAGE_PATH/'precompute')+f"/{x}.tfrecords" for x in range(0,8)]
# #@title write to gcs
# from tqdm import tqdm
# for path in data_paths:
#   with tf.io.TFRecordWriter(path) as file_writer:
#     print(path)
#     for i in tqdm(range(0,200)):
#         byte_stream = serialise(sample())
#         file_writer.write(byte_stream)
Пример #16
0
def write_to_tfrecords(adj, feature, label_data, label_mask, tfrname):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    features = Features(
        feature={
            'label':
            Feature(int64_list=Int64List(value=label_data)),
            'mask_label':
            Feature(int64_list=Int64List(value=label_mask)),
            'adj_row':
            Feature(int64_list=Int64List(value=list(adj_row))),
            'adj_column':
            Feature(int64_list=Int64List(value=list(adj_col))),
            'adj_values':
            Feature(float_list=FloatList(value=list(adj_values))),
            'adj_elem_len':
            Feature(int64_list=Int64List(value=[adj_elem_len])),
            'feature_row':
            Feature(int64_list=Int64List(value=list(feature_row))),
            'feature_column':
            Feature(int64_list=Int64List(value=list(feature_col))),
            'feature_values':
            Feature(float_list=FloatList(value=list(feature_values))),
            'feature_elem_len':
            Feature(int64_list=Int64List(value=[feature_elem_len])),
            'size':
            Feature(int64_list=Int64List(value=list(feature.shape)))
        })
    ex = Example(features=features)
    with TFRecordWriter(tfrname) as single_writer:
        single_writer.write(ex.SerializeToString())