Пример #1
0
def test_export_test_db(
  immutable_test_db: unlabelled_graph_database.Database,
  tempdir: pathlib.Path,
  fmt: programl.StdoutGraphFormat,
):
  db = immutable_test_db
  outdir = tempdir / "graphs"
  exporter = unlabelled_graph_database_exporter.GraphDatabaseExporter(
    db=db, outdir=outdir, fmt=fmt,
  )
  progress.Run(exporter)
  assert outdir.is_dir()
  assert len(list(outdir.iterdir())) == IMMUTABLE_TEST_DB_PROTO_COUNT

  # We can't convert from dot -> graph, so end the test here.
  if fmt == programl.StdoutGraphFormat.DOT:
    return

  # Parse the dumped files.
  for path in outdir.iterdir():
    with open(path, "rb") as f:
      programl.FromBytes(
        f.read(), programl.StdoutGraphFormatToStdinGraphFormat(fmt)
      )
Пример #2
0
def ProcessWorker(packed_args) -> AnnotationResult:
    """The process pool worker function.

  Accepts a batch of unlabelled graphs as inputs, labels them, and returns
  a list of graph tuples.
  """
    start_time = time.time()

    # Unpack the args generated by ProcessWorkerArgsGenerator().
    # Index into the tuple rather than arg unpacking so that we can assign
    # type annotations.
    worker_id: str = f"{packed_args[0]:06d}"
    max_mem_size: int = packed_args[1]
    analysis: str = packed_args[2]
    program_graphs: List[ProgramGraphProto] = packed_args[3]
    ctx: progress.ProgressBarContext = packed_args[4]

    # Set the hard limit on the memory size. Exceeding this limit will raise
    # a MemoryError.
    if FLAGS.limit_worker_mem:
        resource.setrlimit(resource.RLIMIT_DATA, (max_mem_size, max_mem_size))
        resource.setrlimit(resource.RLIMIT_AS, (max_mem_size, max_mem_size))

    graph_tuples = []

    ctx.Log(
        2,
        "[worker %s] received %s unlabelled graphs to process",
        worker_id,
        len(program_graphs),
    )

    with ctx.Profile(
            2,
            lambda t:
        (f"[worker {worker_id}] processed {len(program_graphs)} protos "
         f"({len(graph_tuples)} graphs, {humanize.Duration(t / len(program_graphs))} /proto)"
         ),
    ):
        for i, program_graph in enumerate(program_graphs):
            try:
                annotated_graphs = annotate.Annotate(
                    analysis,
                    programl.FromBytes(program_graph.serialized_proto,
                                       programl.StdinGraphFormat.PB),
                    n=FLAGS.n,
                    timeout=FLAGS.annotator_timeout,
                )

                if annotated_graphs.graphs:
                    # Record the annotated analysis results.
                    for annotated_graph in annotated_graphs.graphs:
                        graph_tuples.append(
                            graph_tuple_database.GraphTuple.CreateFromNetworkX(
                                annotated_graph, ir_id=program_graph.ir_id))
                else:
                    # Analysis produced no outputs, so just record an empty graph.
                    graph_tuples.append(
                        graph_tuple_database.GraphTuple.CreateEmpty(
                            ir_id=program_graph.ir_id))

            except Exception as e:
                _, _, tb = sys.exc_info()
                tb = traceback.extract_tb(tb, 2)
                filename, line_number, function_name, *_ = tb[-1]
                filename = pathlib.Path(filename).name
                ctx.Error(
                    "Failed to annotate graph for ProgramGraph.ir_id=%d: %s "
                    "(%s:%s:%s() -> %s)",
                    program_graph.ir_id,
                    e,
                    filename,
                    line_number,
                    function_name,
                    type(e).__name__,
                )
                graph_tuples.append(
                    graph_tuple_database.GraphTuple.CreateEmpty(
                        ir_id=program_graph.ir_id))

    return AnnotationResult(
        runtime=time.time() - start_time,
        proto_count=len(program_graphs),
        graph_tuples=graph_tuples,
    )
Пример #3
0
def _AnnotateInSubprocess(
    analysis: str,
    graph: Union[programl_pb2.ProgramGraph, bytes],
    n: int = 0,
    timeout: int = 120,
    binary_graph: bool = False,
) -> programl_pb2.ProgramGraphs:
    """Run this script in a subprocess.

  This is the most robust method for enforcing the timeout, but has a huge
  overhead in starting up a new python interpreter for every invocation.

  DISCLAIMER: Because a target cannot depend on itself, all calling code must
  add //deeplearning/ml4pl/graphs/labelled/dataflow:annotate to its list of
  data dependencies.

  Args:
    analysis: The name of the analysis to run.
    graph: The unlabelled ProgramGraph protocol buffer to to annotate, either
      as a proto instance or as binary-encoded byte array.
    n: The maximum number of labelled graphs to produce.
    timeout: The maximum number of seconds to run the analysis for.
    binary_graph: If true, treat the graph argument as a binary byte array.

  Returns:
    A ProgramGraphs protocol buffer.

  Raises:
    IOError: If serializing the input or output protos fails.
    ValueError: If an invalid analysis is requested.
    data_flow_graphs.AnalysisFailed: If the analysis raised an error.
    data_flow_graphs.AnalysisTimeout: If the analysis did not complete within
      the requested timeout.
  """
    process = subprocess.Popen(
        [
            "timeout",
            "-s9",
            str(timeout),
            str(SELF),
            "--analysis",
            analysis,
            "--n",
            str(n),
            "--stdin_fmt",
            "pb",
            "--stdout_fmt",
            "pb",
        ],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )

    # Encode the input if required.
    if binary_graph:
        stdin = graph
    else:
        stdin = programl.ToBytes(graph, fmt=programl.StdoutGraphFormat.PB)

    # Run this analysis script.
    stdout, stderr = process.communicate(stdin)

    if process.returncode == 9 or process.returncode == -9:
        # Process was killed. We assume this is because of timeout, though it could
        # be the user.
        raise data_flow_graphs.AnalysisTimeout(timeout)
    elif process.returncode == E_INVALID_INPUT:
        raise IOError("Failed to serialize input graph")
    elif process.returncode == E_INVALID_STDOUT:
        raise IOError("Analysis failed to write stdout")
    elif process.returncode:
        raise data_flow_graphs.AnalysisFailed(
            f"Analysis failed with returncode {process.returncode}: "
            f"{stderr.decode('utf-8')}")

    # Construct the protocol buffer from stdout.
    output = programl.FromBytes(
        stdout,
        programl.StdinGraphFormat.PB,
        proto=programl_pb2.ProgramGraphs(),
        empty_okay=True,
    )

    return output
Пример #4
0
def test_fuzz_proto_bytes_equivalence(fmt: programl.InputOutputFormat):
    """Test that conversion to and from bytes does not change the proto."""
    input = random_programl_generator.CreateRandomProto()
    output = programl.FromBytes(programl.ToBytes(input, fmt), fmt)
    assert input == output