def input_fn_performance_toxicity(max_n_examples=None,
                                   random_filter_keep_rate=1.0):
     res = utils_tfrecords.decode_tf_records_to_pandas(
         decoding_input_features, TOXICITY_PERFORMANCE_DATASET,
         max_n_examples, random_filter_keep_rate)
     res[model_input_comment_field] = list(
         map(tokenizer, res[TOXICITY_COMMENT_NAME]))
     res = res.rename(columns={TOXICITY_DATA_LABEL: 'label'})
     res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))
     final = res.copy(deep=True)
     return final
 def input_fn_biasbios(max_n_examples=None, random_filter_keep_rate=1.0):
     df_raw = utils_tfrecords.decode_tf_records_to_pandas(
         comments_spec,
         BIASBIOS_PATH,
         max_n_examples=max_n_examples,
         filter_fn=filter_fn_biasbios,
     )
     df_raw[COMMENT_NAME] = list(map(tokenizer, df_raw[COMMENT_NAME]))
     #for _term in identity_terms:
     #  df_raw[_term] = list(df_raw[_term])
     #df_raw[LABEL_NAME] = list(df_raw[LABEL_NAME])
     df_raw = df_raw.rename(columns={
         COMMENT_NAME: model_input_comment_field,
         LABEL_NAME: 'label'
     })
     res = df_raw.copy(deep=True)
     return res
 def input_fn_performance_civil(max_n_examples=None,
                                random_filter_keep_rate=1.0):
     civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(
         civil_comments_spec,
         CIVIL_COMMENTS_PATH,
         max_n_examples=max_n_examples,
         random_filter_keep_rate=random_filter_keep_rate,
     )
     civil_df_raw[CIVIL_COMMENT_NAME] = list(
         map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME]))
     civil_df_raw['toxicity'] = list(
         map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))
     civil_df_raw = civil_df_raw.rename(columns={
         CIVIL_COMMENT_NAME: model_input_comment_field,
         'toxicity': 'label'
     })
     res = civil_df_raw.copy(deep=True)
     return res
 def input_fn_bias_civil(max_n_examples=None):
   civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(
       civil_comments_spec,
       CIVIL_COMMENTS_PATH,
       max_n_examples=max_n_examples,
       filter_fn=filter_fn_civil,
   )
   civil_df_raw[CIVIL_COMMENT_NAME] = list(
       map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME]))
   for _term in identity_terms_civil:
     civil_df_raw[_term] = list(
         map(lambda x: x >= THRESHOLD_BIAS_CIVIL, list(civil_df_raw[_term])))
   civil_df_raw['toxicity'] = list(
       map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))
   civil_df_raw = civil_df_raw.rename(columns={
       CIVIL_COMMENT_NAME: model_input_comment_field,
       'toxicity': 'label'
   })
   res = civil_df_raw.copy(deep=True)
   return res