示例#1
0
文件: run.py 项目: SpringRi/phd
def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(' '.join(
            argv[1:])))

    start_time = time.time()
    instances = [
        clgen.Instance(p) for p in pbutil.FromFile(
            pathlib.Path(FLAGS.instances), clgen_pb2.Instances()).instance
    ]
    random.shuffle(instances)
    candidate_instances = collections.deque(instances)
    logging.info('Loaded %d instances in %s ms', len(candidate_instances),
                 humanize.intcomma(int((time.time() - start_time) * 1000)))

    while candidate_instances:
        instance = candidate_instances.popleft()
        with instance.Session():
            if IsEligible(instance):
                logging.info('Found an eligible candidate to work on')
                SampleModel(instance)
                PostprocessSampleCorpus(instance)
            else:
                logging.info('Candidate is ineligible')
                candidate_instances.append(instance)
                time.sleep(1)

    logging.info('Done.')
示例#2
0
def test_Instance_working_dir_shell_variable_expansion(abc_instance_config):
  """Test that shell variables are expanded in working_dir."""
  working_dir = abc_instance_config.working_dir
  os.environ["FOO"] = working_dir
  abc_instance_config.working_dir = "$FOO/"
  instance = clgen.Instance(abc_instance_config)
  assert str(instance.working_dir) == working_dir
示例#3
0
def test_Instance_Session_no_working_dir(abc_instance_config):
    """Test that $CLEN_CACHE is not set when there's no working_dir."""
    abc_instance_config.ClearField('working_dir')
    os.environ['CLGEN_CACHE'] = 'foo'
    instance = clgen.Instance(abc_instance_config)
    with instance.Session():
        assert os.environ['CLGEN_CACHE'] == 'foo'
示例#4
0
def test_Instance_Session_no_working_dir(abc_instance_config,
                                         tempdir2: pathlib.Path):
    """Test that $CLEN_CACHE is not modified config doesn't set working_dir."""
    abc_instance_config.ClearField('working_dir')
    os.environ['CLGEN_CACHE'] = str(tempdir2)
    instance = clgen.Instance(abc_instance_config)
    with instance.Session():
        assert os.environ['CLGEN_CACHE'] == str(tempdir2)
示例#5
0
def test_main_stop_after_train(abc_instance_file):
    """Test that --stop_after train trains the model."""
    app.FLAGS.unparse_flags()
    app.FLAGS(
        ['argv[0]', '--config', abc_instance_file, '--stop_after', 'train'])
    clgen.main([])
    instance = clgen.Instance(
        pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance()))
    assert instance.model.is_trained
示例#6
0
def test_main_stop_after_corpus(abc_instance_file):
    """Test that --stop_after corpus prevents model training."""
    app.FLAGS.unparse_flags()
    app.FLAGS(
        ['argv[0]', '--config', abc_instance_file, '--stop_after', 'corpus'])
    clgen.main([])
    instance = clgen.Instance(
        pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance()))
    assert not instance.model.is_trained
示例#7
0
 def __init__(self, config: generator_pb2.ClgenGenerator):
     super(ClgenGenerator, self).__init__(config, no_init=True)
     self.instance = clgen.Instance(self.config.instance)
     self.toolchain = 'opencl'
     self.generator = ClgenInstanceToGenerator(self.instance)
     if not self.config.testcase_skeleton:
         raise ValueError('No testcase skeletons provided')
     for skeleton in self.config.testcase_skeleton:
         skeleton.generator.CopyFrom(self.generator)
示例#8
0
def test_main_stop_after_train(abc_instance_file):
  """Test that --stop_after train trains the model."""
  FLAGS.unparse_flags()
  FLAGS(["argv0"])
  FLAGS.config = abc_instance_file
  FLAGS.stop_after = "train"
  clgen.main()
  instance = clgen.Instance(
    pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance())
  )
  assert instance.model.is_trained
示例#9
0
def test_main_stop_after_corpus(abc_instance_file):
  """Test that --stop_after corpus prevents model training."""
  FLAGS.unparse_flags()
  FLAGS(["argv0"])
  FLAGS.config = abc_instance_file
  FLAGS.stop_after = "corpus"
  clgen.main()
  instance = clgen.Instance(
    pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance())
  )
  assert not instance.model.is_trained
示例#10
0
def main():
    """Main entry point."""
    config = MakeClgenInstanceConfig(
        FLAGS.java_clgen_working_dir,
        FLAGS.java_encoded_contentfiles(),
        FLAGS.java_training_epochs,
        FLAGS.java_seed_text,
        FLAGS.neurons_per_layer,
        FLAGS.num_layers,
    )
    samples_db = FLAGS.samples_db()
    TrainAndSampleInstance(clgen.Instance(config), samples_db)
示例#11
0
def test_config_is_valid():
    """Test that config proto is valid."""
    with tempfile.TemporaryDirectory() as d:
        config = pbutil.FromFile(
            bazelutil.DataPath(
                'phd/deeplearning/clgen/tests/data/c99/config.pbtxt'),
            clgen_pb2.Instance())
        # Change the working directory and corpus path to our bazel run dir.
        config.working_dir = d
        config.model.corpus.local_directory = str(
            bazelutil.DataPath('phd/deeplearning/clgen/tests/data/c99/src/'))
        clgen.Instance(config)
示例#12
0
def main():
    """Main entry point."""
    config = java.MakeClgenInstanceConfig(
        FLAGS.java_clgen_working_dir,
        FLAGS.java_encoded_contentfiles(),
        FLAGS.java_training_epochs,
        "kernel void A(",  # OpenCL-specific seed text.
        FLAGS.neurons_per_layer,
        FLAGS.num_layers,
    )
    if not FLAGS.use_encoded_contentfiles_db:
        # Replace the Java corpus with an OpenCL one.
        config.model.corpus.CopyFrom(opencl.CreateCorpusProtoFromFlags())
    samples_db = FLAGS.samples_db()
    java.TrainAndSampleInstance(clgen.Instance(config), samples_db)
示例#13
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    instance = clgen.Instance(
        clgen_pb2.Instance(
            working_dir=FLAGS.clgen_dir,
            model=model_pb2.Model(
                corpus=corpus_pb2.Corpus(
                    local_directory=FLAGS.clgen_corpus_dir,
                    ascii_character_atomizer=True,
                    preprocessor=[
                        "deeplearning.clgen.preprocessors.opencl:ClangPreprocessWithShim",
                        "deeplearning.clgen.preprocessors.opencl:Compile",
                        "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers",
                        "deeplearning.clgen.preprocessors.opencl:StripDoubleUnderscorePrefixes",
                        "deeplearning.clgen.preprocessors.common:StripDuplicateEmptyLines",
                        "deeplearning.clgen.preprocessors.opencl:SanitizeKernelPrototype",
                        "deeplearning.clgen.preprocessors.common:StripTrailingWhitespace",
                        "deeplearning.clgen.preprocessors.opencl:ClangFormat",
                        "deeplearning.clgen.preprocessors.common:MinimumLineCount3",
                        "deeplearning.clgen.preprocessors.opencl:Compile",
                    ],
                    contentfile_separator="\n\n",
                ),
                architecture=model_pb2.NetworkArchitecture(
                    backend=model_pb2.NetworkArchitecture.TENSORFLOW,
                    neuron_type=model_pb2.NetworkArchitecture.LSTM,
                    neurons_per_layer=512,
                    num_layers=2,
                    post_layer_dropout_micros=0,
                ),
                training=model_pb2.TrainingOptions(
                    num_epochs=50,
                    sequence_length=64,
                    batch_size=64,
                    shuffle_corpus_contentfiles_between_epochs=True,
                    adam_optimizer=model_pb2.AdamOptimizer(
                        initial_learning_rate_micros=2000,
                        learning_rate_decay_per_epoch_micros=50000,
                        beta_1_micros=900000,
                        beta_2_micros=999000,
                        normalized_gradient_clip_micros=5000000,
                    ),
                ),
            ),
            sampler=sampler_pb2.Sampler(
                start_text="kernel void ",
                batch_size=64,
                sequence_length=1024,
                temperature_micros=1000000,  # = 1.0 real value
                termination_criteria=[
                    sampler_pb2.SampleTerminationCriterion(
                        symtok=sampler_pb2.SymmetricalTokenDepth(
                            depth_increase_token="{",
                            depth_decrease_token="}",
                        )),
                    sampler_pb2.SampleTerminationCriterion(
                        maxlen=sampler_pb2.MaxTokenLength(
                            maximum_tokens_in_sample=20000, )),
                ],
            ),
        ), )
    db = grewe_features_db.Database(FLAGS.db)
    profile_dir = pathlib.Path(FLAGS.profile_dir)
    profile_dir.mkdir(parents=True, exist_ok=True)
    profiler = prof.AutoCsvProfiler(profile_dir)

    with instance.Session(), multiprocessing.Pool() as pool:
        while True:
            Sample(instance, db, profiler, pool)
示例#14
0
def GetInstances() -> typing.List[clgen.Instance]:
    """Get the list of CLgen instances to test."""
    return [clgen.Instance(c) for c in GetInstanceConfigs().instance]
示例#15
0
def test_Instance_Session_yield_value(abc_instance_config):
  """Test that Session() yields the instance."""
  instance = clgen.Instance(abc_instance_config)
  with instance.Session() as s:
    assert instance == s
示例#16
0
def test_Instance_ToProto_equality(abc_instance_config):
  """Test that ToProto() returns the same as the input config."""
  instance = clgen.Instance(abc_instance_config)
  assert abc_instance_config == instance.ToProto()
示例#17
0
def test_Instance_Session_clgen_dir(abc_instance_config):
  """Test that $CLEN_CACHE is set to working_dir inside a session."""
  instance = clgen.Instance(abc_instance_config)
  with instance.Session():
    assert os.environ["CLGEN_CACHE"] == abc_instance_config.working_dir
示例#18
0
def test_Instance_no_sampler_field(abc_instance_config):
  """Test that UserError is raised when no model field in config."""
  abc_instance_config.ClearField("model_specification")
  with test.Raises(errors.UserError) as e_info:
    clgen.Instance(abc_instance_config)
  assert "Field not set: 'Instance.model_specification'" == str(e_info.value)
示例#19
0
def CreateInstanceFromFlags() -> clgen.Instance:
  return clgen.Instance(CreateInstanceProtoFromFlags())
示例#20
0
def test_Instance_no_working_dir_field(abc_instance_config):
  """Test that working_dir is None when no working_dir field in config."""
  abc_instance_config.ClearField("working_dir")
  instance = clgen.Instance(abc_instance_config)
  assert instance.working_dir is None
示例#21
0
def abc_instance(abc_instance_config: clgen_pb2.Instance):
    """Test fixture that yields an instance."""
    return clgen.Instance(abc_instance_config)