示例#1
0
def get_example_proto(row, features):
    """
    Get an Example protobuf from a dataframe row

    Args:
        - row: pandas DataFrame row
        - features: configuration for all features
    """

    features_dict = dict()
    for feature_info in features:
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        # FIXME
        # When applying functions with axis=1, pandas performs upcasting,
        # so if we have a mix of floats/ints, converts everything to float
        # that breaks this part of the code. Example:
        # https://stackoverflow.com/questions/47143631/
        # how-do-i-preserve-datatype-when-using-apply-row-wise-in-pandas-dataframe
        features_dict[feature_name] = feature_fn(
            [row[feature_name]]
            if not pd.isna(row[feature_name])
            else [feature_info["default_value"]]
        )
    return train.Example(features=train.Features(feature=features_dict))
示例#2
0
def get_sequence_example_proto(group, context_features, sequence_features):
    """
    Get a sequence example protobuf from a dataframe group

    Args:
        - group: pandas dataframe group
        - context_features: feature configuration for context
        - sequence_features: feature configuration for sequence
    """
    sequence_features_dict = dict()
    context_features_dict = dict()

    for feature_info in context_features:
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        context_features_dict[feature_name] = feature_fn([group[feature_name].tolist()[0]])

    for feature_info in sequence_features:
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        if feature_info["tfrecord_type"] == SequenceExampleTypeKey.SEQUENCE:
            sequence_features_dict[feature_name] = train.FeatureList(
                feature=[feature_fn(group[feature_name].tolist())]
            )

    return train.SequenceExample(
        context=train.Features(feature=context_features_dict),
        feature_lists=train.FeatureLists(feature_list=sequence_features_dict),
    )
示例#3
0
def _get_sequence_example_proto(group, feature_config: FeatureConfig):
    """
    Get a sequence example protobuf from a dataframe group

    Args:
        - group: pandas dataframe group
    """
    sequence_features_dict = dict()
    context_features_dict = dict()

    for feature_info in feature_config.get_context_features():
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        context_features_dict[feature_name] = feature_fn(
            [group[feature_name].tolist()[0]])

    for feature_info in feature_config.get_sequence_features():
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        if feature_info["tfrecord_type"] == TFRecordTypeKey.SEQUENCE:
            sequence_features_dict[feature_name] = train.FeatureList(
                feature=[feature_fn(group[feature_name].tolist())])

    sequence_example_proto = train.SequenceExample(
        context=train.Features(feature=context_features_dict),
        feature_lists=train.FeatureLists(feature_list=sequence_features_dict),
    )

    return sequence_example_proto
示例#4
0
def process_folder(folder_path, folder_csv, img_label, test_writer,
                   train_writer):
    folder_files = filter(lambda f: f.endswith('.jpg'), listdir(folder_path))

    ex_test, ex_train = 0, 0
    for img_name in folder_files:
        img_data = folder_csv.get(img_name, None)
        if img_data is None:
            print('Unable to retrieve data for {}'.format(img_name))
        else:
            img = load_img(path.join(folder_path, img_name), img_data)
            if img is not None:
                assert img.shape == (300, 300, 3)
                feature = {
                    'label': int64_feature(img_label),
                    'image': bytes_feature(img.tostring())
                }

                example = train.Example(features=train.Features(
                    feature=feature))
                if put_in_training_set():
                    train_writer.write(example.SerializeToString())
                    ex_train += 1
                else:
                    test_writer.write(example.SerializeToString())
                    ex_test += 1
    return ex_test, ex_train
示例#5
0
def create_example(image_path):
    assert type(image_path) is types.StringType, 'image_path: passed object of incorrect type'
        
    image_data = open(image_path, 'rb').read()
    class_label, class_name = _get_tf_class(os.path.split(image_path)[0])

    return tft.Example(features=tft.Features(feature={                 
        'image/label': _int64_feature(class_label),        
        'image/encoded': _bytes_feature(tfc.as_bytes(image_data)),
    }))
示例#6
0
    def record_writer(self, label, name, writer):
        """ Write an specific example on a writer tensor
        record

        :param label:  image label
        :param name:  image filename
        :param writer: tensor record object
        :return:
        """
        image = load_img(name, target_size=self.shape)
        feature = dict(image=_bytes_feature(
            img_to_array(image, dtype='uint8').tostring()),
                       label=_int64_feature(label))
        features = train.Features(feature=feature)
        samples = train.Example(features=features)
        writer.write(samples.SerializeToString())
示例#7
0
def get_example_proto(row, features):
    """
    Get an Example protobuf from a dataframe row

    Args:
        - row: pandas DataFrame row
        - features: configuration for all features
    """

    features_dict = dict()

    for feature_info in features:
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        features_dict[feature_name] = feature_fn([row[feature_name]])

    return train.Example(features=train.Features(feature=features_dict))
示例#8
0
def get_sequence_example_proto(group, context_features, sequence_features):
    """
    Get a SequenceExample protobuf from a dataframe group

    Parameters
    ----------
    group : pandas dataframe group
    context_features : dict
        dictionary containing the configuration for all the context features
    sequence_features : dict
        dictionary containing the configuration for all the sequence features

    Returns
    -------
    `SequenceExample` object
        SequenceExample object loaded the dataframe group
    """
    sequence_features_dict = dict()
    context_features_dict = dict()

    for feature_info in context_features:
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        feature_val = group[feature_name].tolist()[0]
        context_features_dict[feature_name] = feature_fn(
            feature_val if isinstance(feature_val, list) else [feature_val]
        )

    for feature_info in sequence_features:
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        if feature_info["tfrecord_type"] == SequenceExampleTypeKey.SEQUENCE:
            sequence_features_dict[feature_name] = train.FeatureList(
                feature=[
                    feature_fn(f) if isinstance(f, list) else feature_fn([f])
                    for f in group[feature_name].tolist()
                ]
            )

    return train.SequenceExample(
        context=train.Features(feature=context_features_dict),
        feature_lists=train.FeatureLists(feature_list=sequence_features_dict),
    )
示例#9
0
def get_example_proto(row, features):
    """
    Get an Example protobuf from a pandas dataframe row

    Parameters
    ----------
    row : pandas DataFrame row
        pandas dataframe row to be converted to an example proto
    features : dict
        dictionary containing configuration for all features

    Returns
    -------
    `Example` protobuffer object
        Example object loaded from the specified row
    """

    features_dict = dict()
    for feature_info in features:
        feature_name = feature_info["name"]
        feature_fn = _get_feature_fn(feature_info["dtype"])
        # FIXME
        # When applying functions with axis=1, pandas performs upcasting,
        # so if we have a mix of floats/ints, converts everything to float
        # that breaks this part of the code. Example:
        # https://stackoverflow.com/questions/47143631/
        # how-do-i-preserve-datatype-when-using-apply-row-wise-in-pandas-dataframe
        if feature_name not in row:
            raise Exception(
                "Could not find column {} in record: {}".format(feature_name, str(row))
            )
        feature_val = (
            row[feature_name] if not pd.isna(row[feature_name]) else feature_info["default_value"]
        )
        features_dict[feature_name] = feature_fn(
            feature_val if isinstance(feature_val, list) else [feature_val]
        )

    return train.Example(features=train.Features(feature=features_dict))