示例#1
0
def create_examples(data,
                    bert_client,
                    training=True,
                    label2int=None,
                    class_weight=None):
    """
    data: pd.DataFrame
    label2int: dict
    class_weight: list

    yield examples
    """
    idx_start = data.index[0]

    A_encoded = bert_client.encode(data['title1_en'].tolist())
    B_encoded = bert_client.encode(data['title2_en'].tolist())

    for i in range(len(data)):
        feature = {
            'A_encoded': Feature(float_list=FloatList(value=A_encoded[i])),
            'B_encoded': Feature(float_list=FloatList(value=B_encoded[i]))
        }
        if training:
            label = label2int[data.loc[idx_start + i, 'label']]
            feature['label'] = Feature(int64_list=Int64List(value=[label]))
            feature['class_weight'] = Feature(float_list=FloatList(
                value=[class_weight[label]]))
        else:
            feature['id'] = Feature(int64_list=Int64List(
                value=[data.loc[idx_start + i, 'id']]))

        yield Example(features=Features(feature=feature))
示例#2
0
文件: utils.py 项目: celerinoue/kGCN
def convert_to_example(
    adj,
    feature,
    label_data=None,
    label_mask=None,
):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    degrees = np.sum(adj, 0)
    adj_degrees = []
    for ar, ac in zip(adj_row, adj_col):
        if ar == ac:
            adj_degrees.append(0)
        else:
            adj_degrees.append(int(degrees[ar]))
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    feature = {
        'adj_row': Feature(int64_list=Int64List(value=list(adj_row))),
        'adj_column': Feature(int64_list=Int64List(value=list(adj_col))),
        'adj_values': Feature(float_list=FloatList(value=list(adj_values))),
        'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])),
        'adj_degrees': Feature(int64_list=Int64List(value=adj_degrees)),
        'feature_row': Feature(int64_list=Int64List(value=list(feature_row))),
        'feature_column':
        Feature(int64_list=Int64List(value=list(feature_col))),
        'feature_values':
        Feature(float_list=FloatList(value=list(feature_values))),
        'feature_elem_len':
        Feature(int64_list=Int64List(value=[feature_elem_len])),
        'size': Feature(int64_list=Int64List(value=list(feature.shape)))
    }
    if label_data is not None:
        label_data = np.nan_to_num(label_data)
        feature['label'] = Feature(int64_list=Int64List(
            value=label_data.astype(int)))
        feature['mask_label'] = Feature(int64_list=Int64List(
            value=label_mask.astype(int))),
    features = Features(feature=feature)
    ex = Example(features=features)
    return ex.SerializeToString()
示例#3
0
文件: prep.py 项目: wibrow/kGCN
def write_to_tfrecords(adj, feature, label_data, label_mask, tfrname):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    features = Features(
        feature={
            'label':
            Feature(int64_list=Int64List(value=label_data)),
            'mask_label':
            Feature(int64_list=Int64List(value=label_mask)),
            'adj_row':
            Feature(int64_list=Int64List(value=list(adj_row))),
            'adj_column':
            Feature(int64_list=Int64List(value=list(adj_col))),
            'adj_values':
            Feature(float_list=FloatList(value=list(adj_values))),
            'adj_elem_len':
            Feature(int64_list=Int64List(value=[adj_elem_len])),
            'feature_row':
            Feature(int64_list=Int64List(value=list(feature_row))),
            'feature_column':
            Feature(int64_list=Int64List(value=list(feature_col))),
            'feature_values':
            Feature(float_list=FloatList(value=list(feature_values))),
            'feature_elem_len':
            Feature(int64_list=Int64List(value=[feature_elem_len])),
            'size':
            Feature(int64_list=Int64List(value=list(feature.shape)))
        })
    ex = Example(features=features)
    with TFRecordWriter(tfrname) as single_writer:
        single_writer.write(ex.SerializeToString())
示例#4
0
def get_cycle_example(cell_value, summary_idx, cycle_idx, scaling_factors):
    """
    Define the columns that should be written to tfrecords and converts the raw data
    to "Example" objects. Every Example contains data from one charging cycle.
    The data is scaled (divided) by the corresponding values in "scaling_factors".
    """
    # Summary feature values (scalars --> have to be wrapped in lists)
    ir_value = [
        cell_value["summary"][cst.INTERNAL_RESISTANCE_NAME][summary_idx] /
        scaling_factors[cst.INTERNAL_RESISTANCE_NAME]
    ]
    qd_value = [
        cell_value["summary"][cst.QD_NAME][summary_idx] /
        scaling_factors[cst.QD_NAME]
    ]
    rc_value = [
        cell_value["summary"][cst.REMAINING_CYCLES_NAME][summary_idx] /
        scaling_factors[cst.REMAINING_CYCLES_NAME]
    ]
    dt_value = [
        cell_value["summary"][cst.DISCHARGE_TIME_NAME][summary_idx] /
        scaling_factors[cst.DISCHARGE_TIME_NAME]
    ]
    cc_value = [float(cycle_idx) / scaling_factors[cst.REMAINING_CYCLES_NAME]
                ]  # Same scale --> same scaling factor

    # Detail feature values (arrays)
    qdlin_value = cell_value["cycles"][cycle_idx][
        cst.QDLIN_NAME] / scaling_factors[cst.QDLIN_NAME]
    tdlin_value = cell_value["cycles"][cycle_idx][
        cst.TDLIN_NAME] / scaling_factors[cst.TDLIN_NAME]

    # Wrapping as example
    cycle_example = Example(features=Features(
        feature={
            cst.INTERNAL_RESISTANCE_NAME:
            Feature(float_list=FloatList(value=ir_value)),
            cst.QD_NAME:
            Feature(float_list=FloatList(value=qd_value)),
            cst.REMAINING_CYCLES_NAME:
            Feature(float_list=FloatList(value=rc_value)),
            cst.DISCHARGE_TIME_NAME:
            Feature(float_list=FloatList(value=dt_value)),
            cst.QDLIN_NAME:
            Feature(float_list=FloatList(value=qdlin_value)),
            cst.TDLIN_NAME:
            Feature(float_list=FloatList(value=tdlin_value)),
            cst.CURRENT_CYCLE_NAME:
            Feature(float_list=FloatList(value=cc_value))
        }))
    return cycle_example