def main(argv): if len(argv) != 1: raise app.UsageError(f"Unrecognized arguments: {argv[1:]}") path = pathlib.Path(pathflag.path()) if FLAGS.graphs: with open(path / "graph_stats.csv", "w") as f: writer = csv.writer(f, delimiter=",") logging.info("Aggregating graph stats") progress.Run( CollectGraphStats(path, "test", writer, write_header=True)) progress.Run( CollectGraphStats(path, "val", writer, write_header=True)) progress.Run( CollectGraphStats(path, "train", writer, write_header=True)) with open(path / "label_stats.csv", "w") as f: writer = csv.writer(f, delimiter=",") for i, analysis in enumerate(FLAGS.analysis): logging.info("Aggregating %s stats", analysis) progress.Run( CollectAnalysisStats(path, analysis, writer, write_header=not i))
def main(argv): init_app(argv) path = pathlib.Path(pathflag.path()) db = _mysql.connect(host=FLAGS.host, user=FLAGS.user, passwd=FLAGS.pwd, db=FLAGS.db) # First create the output directories. Fail if they already exist. (path / "ir").mkdir(parents=True) (path / "graphs").mkdir() (path / "train").mkdir() (path / "val").mkdir() (path / "test").mkdir() # Export the legacy IR database. export = ImportIrDatabase(path, db) progress.Run(export) # Import the classifyapp dataset. ImportClassifyAppDataset(pathlib.Path(FLAGS.classifyapp), path) # Add inst2vec encoding features to graphs. logging.info("Encoding graphs with inst2vec") progress.Run(Inst2vecEncodeGraphs(path)) logging.info("Creating vocabularies") subprocess.check_call([str(CREATE_VOCAB), "--path", str(path)]) logging.info("Creating data flow analysis labels") subprocess.check_call([str(CREATE_LABELS), str(path)])
def TestOne( features_list_path: Path, features_list_index: int, checkpoint_path: Path, ) -> BatchResults: path = Path(pathflag.path()) features_list = pbutil.FromFile( features_list_path, program_graph_features_pb2.ProgramGraphFeaturesList(), ) features = features_list.graph[features_list_index] graph_name = features_list_path.name[:-len(".ProgramGraphFeaturesList.pb")] graph = pbutil.FromFile( path / "graphs" / f"{graph_name}.ProgramGraph.pb", program_graph_pb2.ProgramGraph(), ) # Instantiate and restore the model. vocab = vocabulary.LoadVocabulary( path, model_name="cdfg" if FLAGS.cdfg else "programl", max_items=FLAGS.max_vocab_size, target_cumfreq=FLAGS.target_vocab_cumfreq, ) if FLAGS.cdfg: FLAGS.use_position_embeddings = False model = Ggnn( vocabulary=vocab, test_only=True, node_y_dimensionality=2, graph_y_dimensionality=0, graph_x_dimensionality=0, use_selector_embeddings=True, ) checkpoint = pbutil.FromFile(checkpoint_path, checkpoint_pb2.Checkpoint()) model.RestoreCheckpoint(checkpoint) batch = list( DataflowGgnnBatchBuilder( graph_loader=SingleGraphLoader(graph=graph, features=features), vocabulary=vocab, max_node_size=int(1e9), use_cdfg=FLAGS.cdfg, max_batch_count=1, ))[0] results = model.RunBatch(epoch_pb2.TEST, batch) return AnnotateGraphWithBatchResults(graph, features, results)
def main(argv): init_app(argv) progress.Run(TestVocab(pathlib.Path(pathflag.path())))
def main(): """Main entry point.""" create_devmap_dataset(Path(pathflag.path()))
def main(argv): if len(argv) != 1: raise app.UsageError(f"Unrecognized arguments: {argv[1:]}") path = pathlib.Path(pathflag.path()) progress.Run(Inst2vecEncodeGraphs(path))