Пример #1
0
def preprocess_example(example: RawExample, funcall_format: str) -> RawExample:
    """Returns a preprocessed example according to the funcall format."""
    if funcall_format == "top":
        example = copy.deepcopy(example)
        # For old versions of the JSONL files:
        # Change `input_str` to match the official tokenization.
        if "tokens" in example:
            example["input_str"] = " ".join(example["tokens"])
        # Change `output_str` to be the formatted logical form.
        example["output_str"] = top_utils.format_serialized(
            example["output_str"])
        return example
    else:
        raise ValueError(f"Unknown funcall_format: {funcall_format}")
Пример #2
0
 def test_format(self):
     serialized = ("[IN:GET_CALL_TIME [SL:CONTACT "
                   "[IN:GET_CONTACT [SL:TYPE_RELATION Mum ] ] ] "
                   "[SL:DATE_TIME yesterday evening ] ]")
     expected_formatted = ("[IN get call time = [SL contact = "
                           "[IN get contact = [SL type relation = Mum]]] "
                           "[SL date time = yesterday evening]]")
     formatted = top_utils.format_serialized(serialized)
     self.assertEqual(expected_formatted, formatted)
     deformatted = top_utils.deformat_serialized(formatted)
     self.assertEqual(serialized, deformatted)
     # Try calling deserialize_top on the formatted string
     lf = top_utils.deserialize_top(formatted)
     roundtrip_serialized = lf.serialize()
     self.assertEqual(serialized, roundtrip_serialized)
Пример #3
0
def top_funcall_processor(exemplar_outputs, orig_output, config):
    """Returns the processed (exemplar_outputs, orig_output)."""
    # Convert to TopLF
    top_lfs = []
    for output_str in list(exemplar_outputs) + [orig_output]:
        lf = top_utils.deserialize_top(output_str)
        if lf is None:
            raise ValueError(f"Cannot deserialize {output_str}")
        top_lfs.append(lf)
    # Process the TopLFs
    if config.rename_labels:
        rename_top_lf_labels(top_lfs, config.rename_labels)
    if config.anonymize:
        anonymize_top_lf_labels(
            top_lfs, anonymized_labels_type=config.anonymized_labels_type)
    # Convert back into strings.
    outputs = [top_utils.format_serialized(lf.serialize()) for lf in top_lfs]
    return outputs[:-1], outputs[-1]
Пример #4
0
def _format_label(label):
  """Converts labels such as 'IN:SET_NAME' to 'IN set name ='."""
  return top_utils.format_serialized("[" + label).lstrip("[")