Exemplo n.º 1
0
 def testBasicOps(self):
     self.assertCorrectOutput([3, 1, 2], bf.evaluate('+++.--.+.'))
     self.assertCorrectOutput([1, 1, 2], bf.evaluate('+.<.>++.'))
     self.assertCorrectOutput([0], bf.evaluate('+,.'))
     self.assertCorrectOutput(
         [ord(char) for char in 'Hello World!\n'],
         bf.evaluate(
             '>++++++++[-<+++++++++>]<.>>+>-[+]++>++>+++[>[->+++<<+++>]<<]>-----'
             '.>->+++..+++.>-.<<+[>[+>+]>>]<--------------.>>.+++.------.-------'
             '-.>+.>+.'))
Exemplo n.º 2
0
    def testTimeout(self):
        er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=0.1)
        self.assertEqual(([1], False, bf.Status.TIMEOUT),
                         (er.output, er.success, er.failure_reason))
        self.assertTrue(0.07 < er.time < 0.21)

        er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=0.1)
        self.assertEqual(([1, 0], True, bf.Status.SUCCESS),
                         (er.output, er.success, er.failure_reason))
        self.assertTrue(er.time < 0.15)
Exemplo n.º 3
0
  def testMaxSteps(self):
    er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=None,
                     max_steps=100)
    self.assertEqual(
        ([1], False, bf.Status.STEP_LIMIT, 100),
        (er.output, er.success, er.failure_reason, er.steps))

    er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=None,
                     max_steps=100)
    self.assertEqual(
        ([1, 0], True, bf.Status.SUCCESS),
        (er.output, er.success, er.failure_reason))
    self.assertTrue(er.steps < 100)
Exemplo n.º 4
0
    def testUnmatchedBraces(self):
        self.assertCorrectOutput([3, 6, 1],
                                 bf.evaluate('+++.]]]]>----.[[[[[>+.',
                                             input_buffer=[],
                                             base=10,
                                             require_correct_syntax=False))

        eval_result = bf.evaluate('+++.]]]]>----.[[[[[>+.',
                                  input_buffer=[],
                                  base=10,
                                  require_correct_syntax=True)
        self.assertEqual([], eval_result.output)
        self.assertFalse(eval_result.success)
        self.assertEqual(bf.Status.SYNTAX_ERROR, eval_result.failure_reason)
Exemplo n.º 5
0
 def testOutputMemory(self):
   er = bf.evaluate('+>++>+++>++++.', base=256, input_buffer=[],
                    output_memory=True)
   self.assertEqual(
       ([4], True, bf.Status.SUCCESS),
       (er.output, er.success, er.failure_reason))
   self.assertEqual([1, 2, 3, 4], er.memory)
Exemplo n.º 6
0
 def testProgramTrace(self):
   es = bf.ExecutionSnapshot
   er = bf.evaluate(',[.>,].', base=256, input_buffer=[2, 1], debug=True)
   self.assertEqual(
       [es(codeptr=0, codechar=',', memptr=0, memval=0, memory=[0],
           next_input=2, output_buffer=[]),
        es(codeptr=1, codechar='[', memptr=0, memval=2, memory=[2],
           next_input=1, output_buffer=[]),
        es(codeptr=2, codechar='.', memptr=0, memval=2, memory=[2],
           next_input=1, output_buffer=[]),
        es(codeptr=3, codechar='>', memptr=0, memval=2, memory=[2],
           next_input=1, output_buffer=[2]),
        es(codeptr=4, codechar=',', memptr=1, memval=0, memory=[2, 0],
           next_input=1, output_buffer=[2]),
        es(codeptr=5, codechar=']', memptr=1, memval=1, memory=[2, 1],
           next_input=0, output_buffer=[2]),
        es(codeptr=2, codechar='.', memptr=1, memval=1, memory=[2, 1],
           next_input=0, output_buffer=[2]),
        es(codeptr=3, codechar='>', memptr=1, memval=1, memory=[2, 1],
           next_input=0, output_buffer=[2, 1]),
        es(codeptr=4, codechar=',', memptr=2, memval=0, memory=[2, 1, 0],
           next_input=0, output_buffer=[2, 1]),
        es(codeptr=5, codechar=']', memptr=2, memval=0, memory=[2, 1, 0],
           next_input=0, output_buffer=[2, 1]),
        es(codeptr=6, codechar='.', memptr=2, memval=0, memory=[2, 1, 0],
           next_input=0, output_buffer=[2, 1]),
        es(codeptr=7, codechar='', memptr=2, memval=0, memory=[2, 1, 0],
           next_input=0, output_buffer=[2, 1, 0])],
       er.program_trace)
 def _test_case_generator(self, code_solution):
   rand = random.Random(self.seed)
   for _ in xrange(self.n):
     input_case = self.make_input_fn(rand)
     result = bf.evaluate(
         code_solution, input_buffer=input_case, max_steps=self.max_steps,
         base=self.base, require_correct_syntax=False)
     if not result.success:
       raise RuntimeError(
           'Program must succeed. Failed on input: %s' % input_case)
     yield input_case, result.output
Exemplo n.º 8
0
 def _test_case_generator(self, code_solution):
   rand = random.Random(self.seed)
   for _ in xrange(self.n):
     input_case = self.make_input_fn(rand)
     result = bf.evaluate(
         code_solution, input_buffer=input_case, max_steps=self.max_steps,
         base=self.base, require_correct_syntax=False)
     if not result.success:
       raise RuntimeError(
           'Program must succeed. Failed on input: %s' % input_case)
     yield input_case, result.output
  def _score_code(self, code):
    """Run test cases on code and compute reward.

    Args:
      code: A single BF code string.

    Returns:
      misc.RewardInfo namedtuple instance containing reward and code execution
          information, including inputs, expected outputs, code outputs, input
          and output types, and reason for the reward obtained.
    """
    # Get list of 2-tuples, each containing an input sequence and an output
    # sequence.
    io_seqs = self.task.make_io_set()
    terminal_reward = 0.0
    results = []
    reason = 'correct'
    for input_seq, output_seq in io_seqs:
      eval_result = bf.evaluate(
          code, input_buffer=input_seq, timeout=0.1,
          max_steps=self.max_execution_steps,
          base=self.task.base,
          require_correct_syntax=self.require_correct_syntax)
      result, success = eval_result.output, eval_result.success
      if not success:
        # Code execution timed out.
        terminal_reward = self.failure_reward
        results = []
        reason = eval_result.failure_reason
        break
      else:
        terminal_reward += self.reward_fn(result, output_seq, self.task.base)
        if result == output_seq:
          terminal_reward += self.correct_bonus  # Bonus for correct answer.

          # Only add additional reward for shorter code. Subtracting reward
          # interferes with the main objective. Only optimize for length once
          # any solution is found.
          if self.min_code_length == self.max_code_length:
            terminal_reward += self.code_length_bonus
          else:
            terminal_reward += self.code_length_bonus * clipped_linear(
                x=len(code), x0=self.min_code_length, y0=1.0,
                slope=-self.time_penalty, y_range=(0.0, 1.0))

          # reason remains 'correct' if it is already
        elif reason == 'correct':
          reason = 'wrong'
      results.append(result)

    # Return list of rewards, one for each char in the code. All are 0 except
    # for the terminal reward.
    terminal_reward /= self.best_reward
    return misc.RewardInfo(
        episode_rewards=[0.0] * (len(code) - 1) + [terminal_reward],
        input_case=misc.IOTuple(i for i, o in io_seqs),
        correct_output=misc.IOTuple(o for i, o in io_seqs),
        code_output=misc.IOTuple(results),
        input_type=self.input_type,
        output_type=self.output_type,
        reason=reason)
Exemplo n.º 10
0
 def testBadChars(self):
     self.assertCorrectOutput([2, 3, 4],
                              bf.evaluate('>,[>,]hello<world[.<]comments',
                                          input_buffer=[4, 3, 2]))
Exemplo n.º 11
0
 def testInputBuffer(self):
     self.assertCorrectOutput([2, 3, 4],
                              bf.evaluate('>,[>,]<[.<]',
                                          input_buffer=[4, 3, 2]))
Exemplo n.º 12
0
 def testBase(self):
     self.assertCorrectOutput([1, 4],
                              bf.evaluate('+.--.', base=5, input_buffer=[]))
Exemplo n.º 13
0
  def _score_code(self, code):
    """Run test cases on code and compute reward.

    Args:
      code: A single BF code string.

    Returns:
      misc.RewardInfo namedtuple instance containing reward and code execution
          information, including inputs, expected outputs, code outputs, input
          and output types, and reason for the reward obtained.
    """
    # Get list of 2-tuples, each containing an input sequence and an output
    # sequence.
    io_seqs = self.task.make_io_set()
    terminal_reward = 0.0
    results = []
    reason = 'correct'
    for input_seq, output_seq in io_seqs:
      eval_result = bf.evaluate(
          code, input_buffer=input_seq, timeout=0.1,
          max_steps=self.max_execution_steps,
          base=self.task.base,
          require_correct_syntax=self.require_correct_syntax)
      result, success = eval_result.output, eval_result.success
      if not success:
        # Code execution timed out.
        terminal_reward = self.failure_reward
        results = []
        reason = eval_result.failure_reason
        break
      else:
        terminal_reward += self.reward_fn(result, output_seq, self.task.base)
        if result == output_seq:
          terminal_reward += self.correct_bonus  # Bonus for correct answer.

          # Only add additional reward for shorter code. Subtracting reward
          # interferes with the main objective. Only optimize for length once
          # any solution is found.
          if self.min_code_length == self.max_code_length:
            terminal_reward += self.code_length_bonus
          else:
            terminal_reward += self.code_length_bonus * clipped_linear(
                x=len(code), x0=self.min_code_length, y0=1.0,
                slope=-self.time_penalty, y_range=(0.0, 1.0))

          # reason remains 'correct' if it is already
        elif reason == 'correct':
          reason = 'wrong'
      results.append(result)

    # Return list of rewards, one for each char in the code. All are 0 except
    # for the terminal reward.
    terminal_reward /= self.best_reward
    return misc.RewardInfo(
        episode_rewards=[0.0] * (len(code) - 1) + [terminal_reward],
        input_case=misc.IOTuple(i for i, o in io_seqs),
        correct_output=misc.IOTuple(o for i, o in io_seqs),
        code_output=misc.IOTuple(results),
        input_type=self.input_type,
        output_type=self.output_type,
        reason=reason)