def input_fn_performance_toxicity(max_n_examples=None, random_filter_keep_rate=1.0): res = utils_tfrecords.decode_tf_records_to_pandas( decoding_input_features, TOXICITY_PERFORMANCE_DATASET, max_n_examples, random_filter_keep_rate) res[model_input_comment_field] = list( map(tokenizer, res[TOXICITY_COMMENT_NAME])) res = res.rename(columns={TOXICITY_DATA_LABEL: 'label'}) res['label'] = list(map(lambda x: bool(round(x)), list(res['label']))) final = res.copy(deep=True) return final
def input_fn_biasbios(max_n_examples=None, random_filter_keep_rate=1.0): df_raw = utils_tfrecords.decode_tf_records_to_pandas( comments_spec, BIASBIOS_PATH, max_n_examples=max_n_examples, filter_fn=filter_fn_biasbios, ) df_raw[COMMENT_NAME] = list(map(tokenizer, df_raw[COMMENT_NAME])) #for _term in identity_terms: # df_raw[_term] = list(df_raw[_term]) #df_raw[LABEL_NAME] = list(df_raw[LABEL_NAME]) df_raw = df_raw.rename(columns={ COMMENT_NAME: model_input_comment_field, LABEL_NAME: 'label' }) res = df_raw.copy(deep=True) return res
def input_fn_performance_civil(max_n_examples=None, random_filter_keep_rate=1.0): civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas( civil_comments_spec, CIVIL_COMMENTS_PATH, max_n_examples=max_n_examples, random_filter_keep_rate=random_filter_keep_rate, ) civil_df_raw[CIVIL_COMMENT_NAME] = list( map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME])) civil_df_raw['toxicity'] = list( map(lambda x: bool(round(x)), list(civil_df_raw['toxicity']))) civil_df_raw = civil_df_raw.rename(columns={ CIVIL_COMMENT_NAME: model_input_comment_field, 'toxicity': 'label' }) res = civil_df_raw.copy(deep=True) return res
def input_fn_bias_civil(max_n_examples=None): civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas( civil_comments_spec, CIVIL_COMMENTS_PATH, max_n_examples=max_n_examples, filter_fn=filter_fn_civil, ) civil_df_raw[CIVIL_COMMENT_NAME] = list( map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME])) for _term in identity_terms_civil: civil_df_raw[_term] = list( map(lambda x: x >= THRESHOLD_BIAS_CIVIL, list(civil_df_raw[_term]))) civil_df_raw['toxicity'] = list( map(lambda x: bool(round(x)), list(civil_df_raw['toxicity']))) civil_df_raw = civil_df_raw.rename(columns={ CIVIL_COMMENT_NAME: model_input_comment_field, 'toxicity': 'label' }) res = civil_df_raw.copy(deep=True) return res