示例#1
0
class TrainR2BBFP(TrainFPResnet18):
    stage = Field(1)
    learning_rate: float = Field(1e-3)
    learning_rate_decay: float = Field(0.3)
    weight_decay_constant: float = Field(5e-6)
    epochs: int = Field(75)
    batch_size: int = Field(256)

    optimizer = Field(lambda self: tf.keras.optimizers.Adam(
        R2BStepSchedule(
            initial_learning_rate=self.learning_rate,
            steps_per_epoch=self.steps_per_epoch,
            decay_fraction=self.learning_rate_decay,
        )))

    x_offset: float = Field(0.0)
    use_unsign: bool = Field(False)
    use_hard_activation: bool = Field(False)
    use_scale_bconv: bool = Field(True)
    use_prelu_in_half_block: bool = Field(True)
    use_hard_activation: bool = Field(False)
    teacher_model = ComponentField(ResNet18FPFactory)
    initialize_teacher_weights_from = Field("resnet_fp")
    student_model = ComponentField(RealToBinNetFPFactory)

    classification_weight = Field(1.0)
    attention_matching_weight = Field(30.0)
    output_matching_weight = Field(3.0)

    attention_matching_volume_names = Field(
        lambda: [f"block_{b}_out" for b in range(2, 10)])
示例#2
0
class TrainR2BBAN(TrainR2BBFP):
    stage = Field(2)
    learning_rate: float = Field(1e-3)

    teacher_model = ComponentField(RealToBinNetFPFactory)
    student_model = ComponentField(RealToBinNetBANFactory)

    initialize_teacher_weights_from = Field("r2b_fp")
示例#3
0
class TrainR2B(MultiStageExperiment):
    model_modifier: str = Field("default")
    use_unsign: bool = Field(False)

    stage_0 = ComponentField(TrainFPResnet18)
    stage_1 = ComponentField(TrainR2BBFP)
    stage_2 = ComponentField(TrainR2BBAN)
    stage_3 = ComponentField(TrainR2BBNNAlternative)
示例#4
0
class TrainR2BBNN(TrainR2BBFP):
    stage = Field(3)
    learning_rate: float = Field(2e-4)

    classification_weight = Field(1.0)
    attention_matching_weight = Field(0.0)
    output_matching_weight = Field(0.8)
    output_matching_softmax_temperature = Field(1.0)

    teacher_model = ComponentField(RealToBinNetBANFactory)
    student_model = ComponentField(RealToBinNetBNNFactory)

    initialize_teacher_weights_from = Field("r2b_ban")
    initialize_student_weights_from = Field("r2b_ban")
示例#5
0
class TrainDoReFaNet(TrainLarqZooModel):
    model = ComponentField(DoReFaNetFactory)

    epochs = Field(90)
    batch_size = Field(256)

    learning_rate: float = Field(2e-4)
    decay_start: int = Field(60)
    decay_step_2: int = Field(75)
    fast_decay_start: int = Field(82)

    def learning_rate_schedule(self, epoch):
        if epoch < self.decay_start:
            return self.learning_rate
        elif epoch < self.decay_step_2:
            return self.learning_rate * 0.2
        elif epoch < self.fast_decay_start:
            return self.learning_rate * 0.2 * 0.2
        else:
            return (
                self.learning_rate
                * 0.2
                * 0.2
                * 0.1 ** ((epoch - self.fast_decay_start) // 2 + 1)
            )

    optimizer = Field(
        lambda self: tf.keras.optimizers.Adam(self.learning_rate, epsilon=1e-5)
    )
示例#6
0
class TrainQuickNet(TrainLarqZooModel):
    model = ComponentField(QuickNetFactory)
    epochs = Field(600)
    batch_size = Field(2048)

    @Field
    def optimizer(self):
        binary_opt = tf.keras.optimizers.Adam(
            learning_rate=CosineDecayWithWarmup(
                max_learning_rate=1e-2,
                warmup_steps=self.steps_per_epoch * 5,
                decay_steps=self.steps_per_epoch * self.epochs,
            ))
        fp_opt = tf.keras.optimizers.SGD(
            learning_rate=CosineDecayWithWarmup(
                max_learning_rate=0.1,
                warmup_steps=self.steps_per_epoch * 5,
                decay_steps=self.steps_per_epoch * self.epochs,
            ),
            momentum=0.9,
        )
        return lq.optimizers.CaseOptimizer(
            (lq.optimizers.Bop.is_binary_variable, binary_opt),
            default_optimizer=fp_opt,
        )
示例#7
0
class TrainXNORNet(TrainLarqZooModel):
    model = ComponentField(XNORNetFactory)

    epochs = Field(100)
    batch_size = Field(1200)

    initial_lr: float = Field(0.001)
    x_offset: float = Field(0.0)

    def learning_rate_schedule(self, epoch):
        epoch_dec_1 = 19
        epoch_dec_2 = 30
        epoch_dec_3 = 44
        epoch_dec_4 = 53
        epoch_dec_5 = 66
        epoch_dec_6 = 76
        epoch_dec_7 = 86
        if epoch < epoch_dec_1:
            return self.initial_lr
        elif epoch < epoch_dec_2:
            return self.initial_lr * 0.5
        elif epoch < epoch_dec_3:
            return self.initial_lr * 0.1
        elif epoch < epoch_dec_4:
            return self.initial_lr * 0.1 * 0.5
        elif epoch < epoch_dec_5:
            return self.initial_lr * 0.01
        elif epoch < epoch_dec_6:
            return self.initial_lr * 0.01 * 0.5
        elif epoch < epoch_dec_7:
            return self.initial_lr * 0.01 * 0.1
        else:
            return self.initial_lr * 0.001 * 0.1

    optimizer = Field(lambda self: tf.keras.optimizers.Adam(self.initial_lr))
示例#8
0
class TrainFPResnet18(LarqZooModelTrainingPhase):
    stage = Field(0)
    dataset = ComponentField(ImageNet)
    learning_rate: float = Field(1e-1)
    epochs: int = Field(100)
    batch_size: int = Field(512)
    # amount_of_images: int = Field(1281167)
    warmup_duration: int = Field(5)

    optimizer = Field(lambda self: tf.keras.optimizers.SGD(
        CosineDecayWithWarmup(
            max_learning_rate=self.learning_rate,
            warmup_steps=self.warmup_duration * self.steps_per_epoch,
            decay_steps=self.epochs * self.steps_per_epoch,
        )))

    student_model = ComponentField(ResNet18FPFactory)
示例#9
0
class TrainBinaryAlexNet(TrainLarqZooModel):
    model = ComponentField(BinaryAlexNetFactory)

    batch_size: int = Field(512)
    epochs: int = Field(150)

    def learning_rate_schedule(self, epoch):
        return 1e-2 * 0.5**(epoch // 10)

    optimizer = Field(
        lambda self: tf.keras.optimizers.Adam(self.learning_rate_schedule(0)))
示例#10
0
class TrainR2BStrongBaselineBAN(LarqZooModelTrainingPhase):
    stage = Field(0)

    dataset = ComponentField(ImageNet)

    learning_rate: float = Field(1e-3)
    learning_rate_decay: float = Field(0.1)
    epochs: int = Field(75)
    batch_size: int = Field(8)
    # amount_of_images: int = Field(1281167)
    warmup_duration: int = Field(5)

    optimizer = Field(lambda self: tf.keras.optimizers.Adam(
        R2BStepSchedule(
            initial_learning_rate=self.learning_rate,
            steps_per_epoch=self.steps_per_epoch,
            decay_fraction=self.learning_rate_decay,
        )))

    student_model = ComponentField(StrongBaselineNetBANFactory)
示例#11
0
class BinaryNetMnist(Experiment):
    dataset = ComponentField(Mnist)
    input_shape: Tuple[int, int, int] = Field((28, 28, 1))
    preprocessing = ComponentField(PadCropAndFlip, pad_size=32)
    model: tf.keras.models.Model = ComponentField(BinaryNet)

    epochs = Field(100)
    batch_size = Field(128)
    learning_rate: float = Field(5e-3)

    loss = Field("sparse_categorical_crossentropy")
    metrics: Sequence[str] = Field(lambda: ["accuracy"])

    @Field
    def optimizer(self):
        return tf.keras.optimizers.Adam(self.learning_rate)

    def run(self):
        train_data, num_train_examples = self.dataset.train()
        train_data = (train_data.cache().shuffle(
            10 * self.batch_size).repeat().map(
                partial(self.preprocessing,
                        training=True)).batch(self.batch_size))
        validation_data, num_validation_examples = self.dataset.validation()
        validation_data = (validation_data.cache().repeat().map(
            self.preprocessing).batch(self.batch_size))

        self.model.compile(optimizer=self.optimizer,
                           loss=self.loss,
                           metrics=self.metrics)

        lq.models.summary(self.model)

        self.model.fit(
            train_data,
            epochs=self.epochs,
            steps_per_epoch=num_train_examples // self.batch_size,
            validation_data=validation_data,
            validation_steps=num_validation_examples // self.batch_size,
        )
示例#12
0
class TrainFPResnet18(LarqZooModelTrainingPhase):
    stage = Field(0)
    dataset = ComponentField(ImageNet)
    model_modifier: str = Field("default")
    # learning_rate: float = Field(1e-1)
    learning_rate: float = Field(1e-3)
    weight_decay_constant: float = Field(1e-5)
    epochs: int = Field(100)
    batch_size: int = Field(512)
    # amount_of_images: int = Field(1281167)
    warmup_duration: int = Field(5)

    optimizer = Field(
        # lambda self: tf.keras.optimizers.SGD(
        lambda self: tf.keras.optimizers.Adam(
            CosineDecayWithWarmup(
                max_learning_rate=self.learning_rate,
                warmup_steps=self.warmup_duration * self.steps_per_epoch,
                decay_steps=
                (self.epochs - self.warmup_duration) * self.steps_per_epoch,
            )))
    # import pdb; pdb.set_trace()

    student_model = ComponentField(ResNet18FPFactory)
示例#13
0
class TrainR2BBFP(TrainFPResnet18):
    stage = Field(1)
    learning_rate: float = Field(1e-3)
    learning_rate_decay: float = Field(0.3)
    epochs: int = Field(75)
    batch_size: int = Field(256)

    optimizer = Field(lambda self: tf.keras.optimizers.Adam(
        R2BStepSchedule(
            initial_learning_rate=self.learning_rate,
            steps_per_epoch=self.steps_per_epoch,
            decay_fraction=self.learning_rate_decay,
        )))

    teacher_model = ComponentField(ResNet18FPFactory)
    initialize_teacher_weights_from = Field("resnet_fp")
    student_model = ComponentField(RealToBinNetFPFactory)

    classification_weight = Field(1.0)
    attention_matching_weight = Field(30.0)
    output_matching_weight = Field(3.0)

    attention_matching_volume_names = Field(
        lambda: [f"block_{b}_out" for b in range(2, 10)])
示例#14
0
class TrainBinaryDenseNet28(TrainLarqZooModel):
    model: BinaryDenseNet = ComponentField(BinaryDenseNet28Factory)

    epochs = Field(120)
    batch_size = Field(256)

    learning_rate: float = Field(4e-3)
    learning_factor: float = Field(0.1)
    learning_steps: Sequence[int] = Field((100, 110))

    def learning_rate_schedule(self, epoch):
        lr = self.learning_rate
        for step in self.learning_steps:
            if epoch < step:
                return lr
            lr *= self.learning_factor
        return lr

    optimizer = Field(lambda self: tf.keras.optimizers.Adam(self.learning_rate,
                                                            epsilon=1e-8))
示例#15
0
class TrainBiRealNet(TrainLarqZooModel):
    model = ComponentField(BiRealNetFactory)

    epochs = Field(300)
    batch_size = Field(512)

    learning_rate: float = Field(5e-3)
    decay_schedule: str = Field("linear")

    @Field
    def optimizer(self):
        if self.decay_schedule == "linear_cosine":
            lr = tf.keras.experimental.LinearCosineDecay(
                self.learning_rate, 750684)
        elif self.decay_schedule == "linear":
            lr = tf.keras.optimizers.schedules.PolynomialDecay(
                self.learning_rate, 750684, end_learning_rate=0, power=1.0)
        else:
            lr = self.learning_rate
        return tf.keras.optimizers.Adam(lr)
示例#16
0
class ModelFactory:
    """A base class for Larq Zoo models. Defines some common fields."""

    # Don't set any defaults here.
    input_quantizer: QuantizerType = Field()
    kernel_quantizer: QuantizerType = Field()
    kernel_constraint: ConstraintType = Field()

    # This field is included for automatic inference of `num_clases`, if no
    # value is otherwise provided. We set `allow_missing` because we don't want
    # to throw an error if a dataset is not provided, as long as `num_classes`
    # is overriden.
    dataset: Optional[Dataset] = ComponentField(allow_missing=True)

    @Field
    def num_classes(self) -> int:
        if self.dataset is None:
            raise TypeError(
                "No `dataset` is defined so unable to infer `num_classes`. Please "
                "provide a `dataset` or override `num_classes` directly.")
        return self.dataset.num_classes

    include_top: bool = Field(True)
    weights: Optional[str] = Field(None)

    input_shape: Optional[Tuple[DimType, DimType, DimType]] = Field(None)
    input_tensor: Optional[tf.Tensor] = Field(None)

    @property
    def image_input(self) -> tf.Tensor:
        if not hasattr(self, "_image_input"):
            input_shape = utils.validate_input(
                self.input_shape,
                self.weights,
                self.include_top,
                self.num_classes,
            )
            self._image_input = utils.get_input_layer(input_shape,
                                                      self.input_tensor)
        return self._image_input
示例#17
0
class TrainR2BStrongBaseline(MultiStageExperiment):
    stage_0 = ComponentField(TrainR2BStrongBaselineBAN)
    stage_1 = ComponentField(TrainR2BStrongBaselineBNN)
示例#18
0
class TrainR2BStrongBaselineBNN(TrainR2BStrongBaselineBAN):
    stage = Field(1)
    learning_rate: float = Field(2e-4)
    student_model = ComponentField(StrongBaselineNetBNNFactory)
    initialize_student_weights_from = Field("baseline_ban")
示例#19
0
class TrainR2B(MultiStageExperiment):
    stage_0 = ComponentField(TrainFPResnet18)
    stage_1 = ComponentField(TrainR2BBFP)
    stage_2 = ComponentField(TrainR2BBAN)
    stage_3 = ComponentField(TrainR2BBNNAlternative)
示例#20
0
class TrainBinaryDenseNet37(TrainBinaryDenseNet28):
    model = ComponentField(BinaryDenseNet37Factory)
    batch_size = Field(192)
示例#21
0
class TrainQuickNetSmall(TrainQuickNet):
    model = ComponentField(QuickNetSmallFactory)
示例#22
0
class MultiStageExperiment:
    """Allows running a series of `KnowledgeDistillationExperiment`s in sequence."""

    initial_stage: int = Field(0)

    # To add a new stage, also increment the hard-coded `5` in the `experiments`
    # definition below.
    stage_0: TrainingPhase = ComponentField(allow_missing=True)
    stage_1: TrainingPhase = ComponentField(allow_missing=True)
    stage_2: TrainingPhase = ComponentField(allow_missing=True)
    stage_3: TrainingPhase = ComponentField(allow_missing=True)
    stage_4: TrainingPhase = ComponentField(allow_missing=True)

    @property
    def experiments(self):
        for i in range(5):
            exp = getattr(self, f"stage_{i}", None)
            if exp:
                yield exp

    def __post_configure__(self):
        assert 0 <= self.initial_stage < 5

        # Check that all stages have the correct stage number (for setting
        # output directories, et cetera).
        for i, exp in enumerate(self.experiments):
            if exp is not None:
                assert exp.stage == i

        # Check that all stages being used are listed in sequence, without
        # `None` in between.
        for prev_exp, next_exp in zip(
                list(self.experiments)[self.initial_stage:],
                list(self.experiments)[self.initial_stage + 1:],
        ):
            if prev_exp is None:
                assert next_exp is None

    @Field
    def parent_output_dir(self) -> str:
        """Top level experiment directory shared by all sub-experiments.
        This directory will have the following structure:
        ```
        parent_output_dir/models/  # dir shared among all experiments in the sequence to store trained models
        parent_output_dir/stage_0/  # dir with artifacts (checkpoints, logs, tensorboards, ...) of stage 0
        ...
        parent_output_dir/stage_n/ # dir with artifacts (checkpoints, logs, tensorboards, ...) of stage n
        ```
        """
        return str(Path.home() / "tf/data/larq-zoo/zookeeper-logs" /
                   "knowledge_distillation" / self.__class__.__name__ /
                   datetime.now().strftime("%Y%m%d_%H%M%S"))

    @Field
    def model_dir(self) -> Path:
        """Directory shared by all sub-experiments where the models which have completed
        training are stored."""
        return Path(self.parent_output_dir) / "models"

    def run(self) -> None:
        Path(self.parent_output_dir).mkdir(parents=True, exist_ok=True)

        for experiment in self.experiments:
            if experiment.stage < self.initial_stage:
                continue
            print(
                f"Starting stage {experiment.stage} at {datetime.now().isoformat()}."
            )
            experiment.run()
示例#23
0
class TrainBinaryDenseNet45(TrainBinaryDenseNet28):
    model = ComponentField(BinaryDenseNet45Factory)
    epochs = Field(125)
    batch_size = Field(384)
    learning_rate = Field(0.008)
    learning_steps = Field((80, 100))
示例#24
0
class TrainBinaryDenseNet37Dilated(TrainBinaryDenseNet37):
    model = ComponentField(BinaryDenseNet37DilatedFactory)
    epochs = Field(80)
    batch_size = Field(256)
    learning_steps = Field((60, 70))
class TeacherStudentModelFactory(ModelFactory):
    """Model that contains two sub-models; a teacher network and a student network. The
    teacher model should be pre-trained and its knowledge can be transferred to the
    student in two ways:

    - Attention matching: the student is encouraged to pay attention to the same spatial locations at intermediate
        layers in the network.
    - Output distribution matching: the student is trained to match the (optionally softened) predictions of
        the teacher.

    Besides this, the student is also trained on the standard classification loss. All three losses can be weighted.
    """

    teacher_model: tf.keras.models.Model = ComponentField(allow_missing=True)
    student_model: tf.keras.models.Model = ComponentField()

    # Must be set if there is a teacher and allow_missing teacher weights is not True.
    # Either a full path or the name of a network (in which case it will be sought in the current `model_dir`).
    initialize_teacher_weights_from: str = Field(allow_missing=True)
    # explicitly allow missing teacher weights (this should be ane explicit decision, not an accident).
    allow_missing_teacher_weights: bool = Field()
    # Optional: initialize the student weights from here if set.
    initialize_student_weights_from: str = Field(allow_missing=True)
    # Optionally picked up from the parent experiment, used to prepend the arguments above when only a name of a
    # network is given rather than a full path
    model_dir: str = Field(allow_missing=True)

    # parameters related to the standard cross-entropy training of the student on the target labels
    #  - weight on the loss component for standard classification
    classification_weight: float = Field()

    _classification_loss = None

    @property
    def classification_loss(self) -> tf.keras.losses.Loss:
        if self._classification_loss is None:
            self._classification_loss = WeightedCrossEntropyLoss(
                self.classification_weight)
        return self._classification_loss

    # parameters related to the training through attention matching between teacher and student activation volumes
    #  - weight on the loss component for spatial attention matching
    attention_matching_weight: float = Field()
    #  - list of partial names of the layers for which the outputs should be matched
    attention_matching_volume_names: Optional[List[str]] = Field(
        allow_missing=True)
    #  - optional separate list of partial names for the teacher. If not given, the names above will be used.
    attention_matching_volume_names_teacher: Optional[List[str]] = Field(
        allow_missing=True)
    #  - allow teacher to be trained to better match activations with the student
    attention_matching_train_teacher: bool = Field()

    # parameters related to the training through the matching of the output predictions of the teacher and student
    #  - weight on the loss component for knowledge distillation
    output_matching_weight: float = Field()
    #  - temperature used for the softmax when matching distributions
    output_matching_softmax_temperature: float = Field()
    #  - allow the teacher to be trained during output distribution matching
    output_matching_train_teacher: bool = Field()

    def build(self) -> tf.keras.models.Model:
        def _load_submodel(sub_model: tf.keras.Model, path: str, name: str):
            if len(Path(path).parts) < 2:  # not a path but a network name
                path = str(Path(self.model_dir) / path)
            try:
                print(f"Loading {name} weights from {path}.")
                sub_model.load_weights(path)
            except tf.errors.InvalidArgumentError as e:
                raise (ValueError(
                    f"Could not find {name} weights at {path}: the directory seems to be wrong"
                )) from e
            except tf.errors.NotFoundError as e:
                raise (ValueError(
                    f"Could not find {name} weights at {path}: the checkpoint files seem to be missing"
                )) from e

        if hasattr(self, "teacher_model"):
            if hasattr(self, "initialize_teacher_weights_from"):
                _load_submodel(
                    self.teacher_model,
                    path=self.initialize_teacher_weights_from,
                    name="teacher",
                )
            else:
                if not self.allow_missing_teacher_weights:
                    raise ValueError(
                        "Teachers should know probably know something, but no teacher_weights_path was provided."
                    )

        if hasattr(self, "initialize_student_weights_from"):
            _load_submodel(
                self.student_model,
                path=self.initialize_student_weights_from,
                name="student",
            )

        if not hasattr(self, "teacher_model"):
            assert (
                self.output_matching_weight == 0
                and self.attention_matching_weight == 0
            ), "No teacher set, but trying to use attention or distribution matching"
            # If there is no teacher model we do not need the teacher-student model
            # and can instead simply return the student model
            return self.student_model
        else:
            assert (
                self.output_matching_weight >= 0
                or self.attention_matching_weight >= 0
            ), "Teacher model loaded but all teacher-student knowledge distillation losses are smaller than 0"

        assert (
            len(self.teacher_model.inputs) == 1
            and len(self.student_model.inputs) == 1
        ), ("TeacherStudentModelFactory expects the teacher and student models to have one input each, but received:"
            f"\n - a teacher with {len(self.teacher_model.inputs)} inputs and "
            f"\n - a student with {len(self.student_model.inputs)} inputs. ")

        # We take the output of the student and run it through some loss layers, which connects the
        # output to the teacher in the TF graph.
        combined_output = self.student_model.output

        if self.attention_matching_weight > 0:
            assert self.attention_matching_volume_names is not None
            attention_volume_names_teacher = (
                self.attention_matching_volume_names_teacher if hasattr(
                    self, "attention_matching_volume_names_teacher") else
                self.attention_matching_volume_names)
            teacher_attention_volumes = [
                get_unique_layer_with_partial_name(self.teacher_model,
                                                   name).output
                for name in attention_volume_names_teacher
            ]
            student_attention_volumes = [
                get_unique_layer_with_partial_name(self.student_model,
                                                   name).output
                for name in self.attention_matching_volume_names
            ]

            combined_output = AttentionMatchingLossLayer(
                loss_weight=self.attention_matching_weight,
                propagate_teacher_gradients=self.
                attention_matching_train_teacher,
            )([
                combined_output, teacher_attention_volumes,
                student_attention_volumes
            ])

        if self.output_matching_weight > 0:
            tl, sl = [
                get_unique_layer_with_partial_name(model, "logits").output
                for model in (self.teacher_model, self.student_model)
            ]
            combined_output = OutputDistributionMatchingLossLayer(
                loss_weight=self.output_matching_weight,
                softmax_temperature=self.output_matching_softmax_temperature,
                propagate_teacher_gradients=self.output_matching_train_teacher,
            )([combined_output, tl, sl])

        if (not self.attention_matching_train_teacher
                and not self.output_matching_train_teacher):
            for layer in self.teacher_model.layers:
                layer.trainable = False

        combined_model = tf.keras.models.Model(
            inputs=[*self.teacher_model.inputs, *self.student_model.inputs],
            outputs=combined_output,
            name="combined_model",
        )

        # the classification loss is added when the model is compiled, as it depends on the targets
        # Return a model which takes a single input and passes it to both the teacher and the student.
        return tf.keras.Model(
            inputs=self.image_input,
            outputs=combined_model([self.image_input, self.image_input]),
            name="teacher_student_model",
        )
示例#26
0
class BinaryNet:
    dataset: Dataset = ComponentField()
    preprocessing: Preprocessing = ComponentField()

    filters: int = Field(128)
    dense_units: int = Field(1024)
    kernel_size: Union[int, Tuple[int, int]] = Field((3, 3))

    input_shape: Tuple[int, int, int] = Field()

    def build(self) -> tf.keras.models.Model:
        kwhparams = dict(
            input_quantizer="ste_sign",
            kernel_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            use_bias=False,
        )

        return tf.keras.models.Sequential([
            # Don't quantize inputs in first layer
            lq.layers.QuantConv2D(
                self.filters,
                self.kernel_size,
                kernel_quantizer="ste_sign",
                kernel_constraint="weight_clip",
                use_bias=False,
                input_shape=self.input_shape,
            ),
            tf.keras.layers.BatchNormalization(scale=False),
            lq.layers.QuantConv2D(self.filters,
                                  self.kernel_size,
                                  padding="same",
                                  **kwhparams),
            tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
            tf.keras.layers.BatchNormalization(scale=False),
            lq.layers.QuantConv2D(2 * self.filters,
                                  self.kernel_size,
                                  padding="same",
                                  **kwhparams),
            tf.keras.layers.BatchNormalization(scale=False),
            lq.layers.QuantConv2D(2 * self.filters,
                                  self.kernel_size,
                                  padding="same",
                                  **kwhparams),
            tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
            tf.keras.layers.BatchNormalization(scale=False),
            lq.layers.QuantConv2D(4 * self.filters,
                                  self.kernel_size,
                                  padding="same",
                                  **kwhparams),
            tf.keras.layers.BatchNormalization(scale=False),
            lq.layers.QuantConv2D(4 * self.filters,
                                  self.kernel_size,
                                  padding="same",
                                  **kwhparams),
            tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
            tf.keras.layers.BatchNormalization(scale=False),
            tf.keras.layers.Flatten(),
            lq.layers.QuantDense(self.dense_units, **kwhparams),
            tf.keras.layers.BatchNormalization(scale=False),
            lq.layers.QuantDense(self.dense_units, **kwhparams),
            tf.keras.layers.BatchNormalization(scale=False),
            lq.layers.QuantDense(self.dataset.num_classes, **kwhparams),
            tf.keras.layers.BatchNormalization(scale=False),
            tf.keras.layers.Activation("softmax"),
        ])
示例#27
0
class TrainQuickNetLarge(TrainQuickNet):
    model = ComponentField(QuickNetLargeFactory)
示例#28
0
class TrainingPhase(TrainLarqZooModel):
    """Class used in multi-stage experiments with teacher-student knowledge distillation.

    This class makes it easy to run teacher-student knowledge distillation experiments
    as part of a sequence. After running the experiment, the student model weights will
    be saved.
    """

    # stage as part of a sequence of experiments, starting at 0
    stage: int = Field()

    model = ComponentField(TeacherStudentModelFactory)
    teacher_model: tf.keras.models.Model = ComponentField(allow_missing=True)
    # Can't really be missing but might be set on the teacher-student model directly
    student_model: tf.keras.models.Model = ComponentField(allow_missing=True)

    # Must be set if there is a teacher and allow_missing teacher weights is not True.
    # Either a full path or the name of a network (in which case it will be sought in the current `model_dir`).
    initialize_teacher_weights_from: str = Field(allow_missing=True)
    # Explicitly allow missing teacher weights (this should be an explicit decision, not an accident).
    allow_missing_teacher_weights: bool = Field(False)
    # Optional: initialize the student weights from here if set.
    initialize_student_weights_from: str = Field(allow_missing=True)

    # This will be inherited from the parent `MultiStageExperiment`.
    parent_output_dir: str = Field(allow_missing=True)

    @Field
    def output_dir(self) -> Path:
        """Main experiment output directory.

        In this directory, the training checkpoints, logs, and tensorboard files will be
        stored for this (sub-)experiment.
        """

        # When running as part of an `MultiStageExperiment` the outputs of this `stage` of the experiment will be
        # stored in a sub directory of the `MultiStageExperiment` which is named after the current stage index.
        if hasattr(self, "parent_output_dir"):
            return Path(self.parent_output_dir) / f"stage_{self.stage}"

        return (Path.home() / "tf/data/larq-zoo/zookeeper-logs" /
                self.dataset.__class__.__name__ / self.__class__.__name__ /
                datetime.now().strftime("%Y%m%d_%H%M%S"))

    @Field
    def model_dir(self) -> str:
        """The directory in which trained models are stored.

        When running as part of a sequence, this directory is shared among all stages
        such that the results of earlier stages can be used in later stages.
        """
        if hasattr(self, "parent_output_dir"):
            base = Path(self.parent_output_dir)
        else:
            base = Path(self.output_dir)
        return str(base / "models")

    def run(self):
        super().run()
        student = self.__base_getattribute__("model").student_model
        # Specifically saving the student at the end of training as this sub-model is what the training procedure
        # should optimize.
        student.save_weights(str(Path(self.model_dir) / student.name))