class ProjectAggregateCombinerConfig(BaseCombinerConfig): projection_size: int = schema_utils.PositiveInteger( default=128, description= "All combiner inputs are projected to this size before being aggregated." ) fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList( description= "Full secification of the fully connected layers after the aggregation. " "It should be a list of dict, each disct representing one layer.") num_fc_layers: int = schema_utils.NonNegativeInteger( default=2, description="Number of fully connected layers after aggregation.") output_size: int = schema_utils.PositiveInteger( default=128, description= "Output size of each layer of the stack of fully connected layers.") use_bias: bool = schema_utils.Boolean( default=True, description="Whether the layers use a bias vector.") weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="xavier_uniform", description= "Initializer to use for the weights of the projection and for the fully connected layers.", ) bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="zeros", description= "Initializer to use for the baias of the projection and for the fully connected layers.", ) norm: Optional[str] = schema_utils.StringOptions( ["batch", "layer"], default="layer", description= "Normalization to apply to each projection and fully connected layer.", ) norm_params: Optional[dict] = schema_utils.Dict( description= "Parameters of the normalization to apply to each projection and fully connected layer." ) activation: str = schema_utils.ActivationOptions( default="relu", description="Activation to apply to each fully connected layer.") dropout: float = schema_utils.FloatRange( default=0.0, min=0, max=1, description="Dropout rate to apply to each fully connected layer.") residual: bool = schema_utils.Boolean( default=True, description= "Whether to add residual skip connection between the fully connected layers in the stack..", )
class ConcatCombinerConfig(BaseCombinerConfig): """Parameters for concat combiner.""" fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList( description="") num_fc_layers: int = schema_utils.NonNegativeInteger(default=0, description="") output_size: int = schema_utils.PositiveInteger( default=256, description="Output size of a fully connected layer.") use_bias: bool = schema_utils.Boolean( default=True, description="Whether the layer uses a bias vector.") weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="xavier_uniform", description="") bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="zeros", description="") norm: Optional[str] = schema_utils.StringOptions(["batch", "layer"], description="") norm_params: Optional[dict] = schema_utils.Dict(description="") activation: str = schema_utils.ActivationOptions(default="relu", description="") dropout: float = schema_utils.FloatRange(default=0.0, min=0, max=1, description="") flatten_inputs: bool = schema_utils.Boolean( default=False, description="Whether to flatten input tensors to a vector.") residual: bool = schema_utils.Boolean( default=False, description= ("Whether to add a residual connection to each fully connected layer block. All fully connected layers must" " have the same size"), )
class ComparatorCombinerConfig(BaseCombinerConfig): """Parameters for comparator combiner.""" entity_1: List[str] """TODO: Document parameters.""" entity_2: List[str] """TODO: Document parameters.""" fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList( description="") num_fc_layers: int = schema_utils.NonNegativeInteger(default=1, description="") output_size: int = schema_utils.PositiveInteger( default=256, description="Output size of a fully connected layer") use_bias: bool = schema_utils.Boolean( default=True, description="Whether the layer uses a bias vector.") weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="xavier_uniform", description="") bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="zeros", description="") norm: Optional[str] = schema_utils.StringOptions(["batch", "layer"], description="") norm_params: Optional[dict] = schema_utils.Dict(description="") activation: str = schema_utils.ActivationOptions(default="relu", description="") dropout: float = schema_utils.FloatRange( default=0.0, min=0, max=1, description="Dropout rate for the transformer block.")
def test_InitializerOrDict(): # Test metadata matches expected defaults after field creation (null allowed): default_initializerordict = get_marshmallow_from_dataclass_field(schema_utils.InitializerOrDict()) assert default_initializerordict.default == "xavier_uniform" initializerordict = get_marshmallow_from_dataclass_field(schema_utils.InitializerOrDict("zeros")) assert initializerordict.default == "zeros" # Test default value validation: with pytest.raises(MarshmallowValidationError): schema_utils.InitializerOrDict("test") # Test simple schema creation: @dataclass class CustomTestSchema(schema_utils.BaseMarshmallowConfig): foo: Union[None, str, Dict] = schema_utils.InitializerOrDict() # Test invalid non-dict loads: with pytest.raises(MarshmallowValidationError): CustomTestSchema.Schema().load({"foo": 1}) with pytest.raises(MarshmallowValidationError): CustomTestSchema.Schema().load({"foo": "test"}) # Test valid loads: assert CustomTestSchema.Schema().load({}).foo == "xavier_uniform" assert CustomTestSchema.Schema().load({"foo": "zeros"}).foo == "zeros" # Test invalid dict loads: with pytest.raises(MarshmallowValidationError): CustomTestSchema.Schema().load({"foo": None}) with pytest.raises(MarshmallowValidationError): CustomTestSchema.Schema().load({"foo": {"a": "b"}}) with pytest.raises(MarshmallowValidationError): CustomTestSchema.Schema().load({"foo": {"type": "invalid"}}) # Test valid dict loads: assert CustomTestSchema.Schema().load({"foo": {"type": "zeros"}}).foo == {"type": "zeros"}
class CustomTestSchema(schema_utils.BaseMarshmallowConfig): foo: Union[None, str, Dict] = schema_utils.InitializerOrDict()
class CommonTransformerConfig: """Common transformer parameter values.""" num_layers: int = schema_utils.PositiveInteger(default=1, description="") hidden_size: int = schema_utils.NonNegativeInteger( default=256, description= ("The number of hidden units of the TransformerStack as well as the dimension that each incoming input " "feature is projected to before feeding to the TransformerStack"), ) num_heads: int = schema_utils.NonNegativeInteger( default=8, description= "Number of heads of the self attention in the transformer block.") transformer_output_size: int = schema_utils.NonNegativeInteger( default=256, description= ("Size of the fully connected layer after self attention in the transformer block. This is usually the same " "as `hidden_size` and `embedding_size`."), ) dropout: float = schema_utils.FloatRange( default=0.1, min=0, max=1, description="Dropout rate for the transformer block.") fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList( description="") # TODO(#1673): Add conditional logic for fields like this one: num_fc_layers: int = schema_utils.NonNegativeInteger( default=0, description= "The number of stacked fully connected layers (only applies if `reduce_output` is not null).", ) output_size: int = schema_utils.PositiveInteger( default=256, description="Output size of a fully connected layer.") use_bias: bool = schema_utils.Boolean( default=True, description="Whether the layer uses a bias vector.") weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="xavier_uniform", description="") bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="zeros", description="") norm: Optional[str] = schema_utils.StringOptions(["batch", "layer"], description="") norm_params: Optional[dict] = schema_utils.Dict(description="") fc_activation: str = schema_utils.ActivationOptions(default="relu", description="") fc_dropout: float = schema_utils.FloatRange(default=0.0, min=0, max=1, description="") fc_residual: bool = schema_utils.Boolean(default=False, description="")