def __init__( self, num_classes: int, backbone: str = "prajjwal1/bert-medium", loss_fn: Optional[Callable] = None, optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam, metrics: Union[Callable, Mapping, Sequence, None] = None, learning_rate: float = 1e-2, multi_label: bool = False, serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, ): if not _TEXT_AVAILABLE: raise ModuleNotFoundError("Please, pip install 'lightning-flash[text]'") self.save_hyperparameters() os.environ["TOKENIZERS_PARALLELISM"] = "TRUE" # disable HF thousand warnings warnings.simplefilter("ignore") # set os environ variable for multiprocesses os.environ["PYTHONWARNINGS"] = "ignore" super().__init__( model=None, loss_fn=loss_fn, optimizer=optimizer, metrics=metrics, learning_rate=learning_rate, multi_label=multi_label, serializer=serializer or Labels(multi_label=multi_label), ) self.model = BertForSequenceClassification.from_pretrained(backbone, num_labels=num_classes) self.save_hyperparameters()
def test_saving_with_serializers(tmpdir): checkpoint_file = os.path.join(tmpdir, 'tmp.ckpt') class CustomModel(Task): def __init__(self): super().__init__(model=torch.nn.Linear(1, 1), loss_fn=torch.nn.MSELoss()) serializer = Labels(["a", "b"]) model = CustomModel() trainer = Trainer(fast_dev_run=True) data_pipeline = DataPipeline(DefaultPreprocess(), serializer=serializer) data_pipeline.initialize() model.data_pipeline = data_pipeline assert isinstance(model.preprocess, DefaultPreprocess) dummy_data = DataLoader( list( zip(torch.arange(10, dtype=torch.float), torch.arange(10, dtype=torch.float)))) trainer.fit(model, train_dataloader=dummy_data) trainer.save_checkpoint(checkpoint_file) model = CustomModel.load_from_checkpoint(checkpoint_file) assert isinstance(model.preprocess._data_pipeline_state, DataPipelineState) assert model.preprocess._data_pipeline_state._state[ ClassificationState] == ClassificationState(['a', 'b'])
def test_classification_serializers(): example_output = torch.tensor([-0.1, 0.2, 0.3]) # 3 classes labels = ['class_1', 'class_2', 'class_3'] assert torch.allclose(torch.tensor(Logits().serialize(example_output)), example_output) assert torch.allclose( torch.tensor(Probabilities().serialize(example_output)), torch.softmax(example_output, -1)) assert Classes().serialize(example_output) == 2 assert Labels(labels).serialize(example_output) == 'class_3'
def __init__( self, num_classes: int, backbone: Union[str, nn.Module] = "x3d_xs", backbone_kwargs: Optional[Dict] = None, pretrained: bool = True, loss_fn: Callable = F.cross_entropy, optimizer: Type[torch.optim.Optimizer] = torch.optim.SGD, optimizer_kwargs: Optional[Dict[str, Any]] = None, scheduler: Optional[Union[Type[_LRScheduler], str, _LRScheduler]] = None, scheduler_kwargs: Optional[Dict[str, Any]] = None, metrics: Union[Metric, Callable, Mapping, Sequence, None] = Accuracy(), learning_rate: float = 1e-3, head: Optional[Union[FunctionType, nn.Module]] = None, serializer: Optional[Serializer] = None, ): super().__init__( model=None, loss_fn=loss_fn, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, scheduler=scheduler, scheduler_kwargs=scheduler_kwargs, metrics=metrics, learning_rate=learning_rate, serializer=serializer or Labels(), ) self.save_hyperparameters() if not backbone_kwargs: backbone_kwargs = {} backbone_kwargs["pretrained"] = True if ( flash._IS_TESTING and torch.cuda.is_available()) else pretrained backbone_kwargs["head_activation"] = None if isinstance(backbone, nn.Module): self.backbone = backbone elif isinstance(backbone, str): self.backbone = self.backbones.get(backbone)(**backbone_kwargs) num_features = self.backbone.blocks[-1].proj.out_features else: raise MisconfigurationException( f"backbone should be either a string or a nn.Module. Found: {backbone}" ) self.head = head or nn.Sequential( nn.Flatten(), nn.Linear(num_features, num_classes), )
def __init__( self, num_classes: int, backbone: Union[str, Tuple[nn.Module, int]] = "resnet18", backbone_kwargs: Optional[Dict] = None, head: Optional[Union[FunctionType, nn.Module]] = None, pretrained: Union[bool, str] = True, loss_fn: Optional[Callable] = None, optimizer: Union[Type[torch.optim.Optimizer], torch.optim.Optimizer] = torch.optim.Adam, optimizer_kwargs: Optional[Dict[str, Any]] = None, scheduler: Optional[Union[Type[_LRScheduler], str, _LRScheduler]] = None, scheduler_kwargs: Optional[Dict[str, Any]] = None, metrics: Union[Metric, Callable, Mapping, Sequence, None] = None, learning_rate: float = 1e-3, multi_label: bool = False, serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, ): super().__init__( num_classes=num_classes, model=None, loss_fn=loss_fn, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, scheduler=scheduler, scheduler_kwargs=scheduler_kwargs, metrics=metrics, learning_rate=learning_rate, multi_label=multi_label, serializer=serializer or Labels(multi_label=multi_label), ) self.save_hyperparameters() if not backbone_kwargs: backbone_kwargs = {} if isinstance(backbone, tuple): self.backbone, num_features = backbone else: self.backbone, num_features = self.backbones.get(backbone)( pretrained=pretrained, **backbone_kwargs) head = head(num_features, num_classes) if isinstance( head, FunctionType) else head self.head = head or nn.Sequential(nn.Linear(num_features, num_classes), )
def test_classification_serializers_multi_label(): example_output = torch.tensor([-0.1, 0.2, 0.3]) # 3 classes labels = ['class_1', 'class_2', 'class_3'] assert torch.allclose( torch.tensor(Logits(multi_label=True).serialize(example_output)), example_output) assert torch.allclose( torch.tensor( Probabilities(multi_label=True).serialize(example_output)), torch.sigmoid(example_output), ) assert Classes(multi_label=True).serialize(example_output) == [1, 2] assert Labels(labels, multi_label=True).serialize(example_output) == [ 'class_2', 'class_3' ]
def __init__( self, num_features: int, num_classes: int, backbone: Union[str, Tuple[nn.Module, int]] = "mlp-128", backbone_kwargs: Optional[Dict] = None, loss_fn: Optional[Callable] = None, optimizer: Union[Type[torch.optim.Optimizer], torch.optim.Optimizer] = torch.optim.Adam, optimizer_kwargs: Optional[Dict[str, Any]] = None, scheduler: Optional[Union[Type[_LRScheduler], str, _LRScheduler]] = None, scheduler_kwargs: Optional[Dict[str, Any]] = None, metrics: Union[torchmetrics.Metric, Mapping, Sequence, None] = None, learning_rate: float = 1e-2, multi_label: bool = False, serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, ): super().__init__( model=None, loss_fn=loss_fn, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, scheduler=scheduler, scheduler_kwargs=scheduler_kwargs, metrics=metrics, learning_rate=learning_rate, multi_label=multi_label, serializer=serializer or Labels(), ) self.save_hyperparameters() if not backbone_kwargs: backbone_kwargs = {} if isinstance(backbone, tuple): self.backbone, out_features = backbone else: self.backbone, out_features = self.backbones.get(backbone)( num_features=num_features, **backbone_kwargs) self.head = nn.Linear(out_features, num_classes)
def __init__( self, num_classes: int, backbone: str = "prajjwal1/bert-medium", loss_fn: Optional[Callable] = None, optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam, optimizer_kwargs: Optional[Dict[str, Any]] = None, scheduler: Optional[Union[Type[_LRScheduler], str, _LRScheduler]] = None, scheduler_kwargs: Optional[Dict[str, Any]] = None, metrics: Union[Metric, Callable, Mapping, Sequence, None] = None, learning_rate: float = 1e-2, multi_label: bool = False, serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, enable_ort: bool = False, ): self.save_hyperparameters() os.environ["TOKENIZERS_PARALLELISM"] = "TRUE" # disable HF thousand warnings warnings.simplefilter("ignore") # set os environ variable for multiprocesses os.environ["PYTHONWARNINGS"] = "ignore" super().__init__( num_classes=num_classes, model=None, loss_fn=loss_fn, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, scheduler=scheduler, scheduler_kwargs=scheduler_kwargs, metrics=metrics, learning_rate=learning_rate, multi_label=multi_label, serializer=serializer or Labels(multi_label=multi_label), ) self.enable_ort = enable_ort self.model = self.backbones.get(backbone)(num_labels=num_classes) self.save_hyperparameters()
dataset_dir="data/hymenoptera_data/test/", dataset_type=fo.types.ImageClassificationDirectoryTree, ) # 3 Load FiftyOne datasets datamodule = ImageClassificationData.from_fiftyone( train_dataset=train_dataset, val_dataset=val_dataset, test_dataset=test_dataset, ) # 4 Fine tune a model model = ImageClassifier( backbone="resnet18", num_classes=datamodule.num_classes, serializer=Labels(), ) trainer = flash.Trainer( max_epochs=1, limit_train_batches=1, limit_val_batches=1, ) trainer.finetune( model, datamodule=datamodule, strategy=FreezeUnfreeze(unfreeze_epoch=1), ) trainer.save_checkpoint("image_classification_model.pt") # 5 Predict from checkpoint on data with ground truth model = ImageClassifier.load_from_checkpoint("https://flash-weights.s3.amazonaws.com/image_classification_model.pt")
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pytorch_lightning import Trainer from flash.core.classification import Labels from flash.core.data.utils import download_data from flash.text import TextClassificationData, TextClassifier # 1. Download the data download_data("https://pl-flash-data.s3.amazonaws.com/imdb.zip", "data/") # 2. Load the model from a checkpoint model = TextClassifier.load_from_checkpoint("https://flash-weights.s3.amazonaws.com/text_classification_model.pt") model.serializer = Labels() # 2a. Classify a few sentences! How was the movie? predictions = model.predict([ "Turgid dialogue, feeble characterization - Harvey Keitel a judge?.", "The worst movie in the history of cinema.", "I come from Bulgaria where it 's almost impossible to have a tornado.", "Very, very afraid.", "This guy has done a great job with this movie!", ]) print(predictions) # 2b. Or generate predictions from a sheet file! datamodule = TextClassificationData.from_csv( "review", predict_file="data/imdb/predict.csv",
) # 3. Build the model model = ImageClassifier( backbone="resnet18", num_classes=len(genres), multi_label=True, metrics=F1(num_classes=len(genres)), ) # 4. Create the trainer. Train on 2 gpus for 10 epochs. trainer = flash.Trainer(max_epochs=10) # 5. Train the model trainer.finetune(model, datamodule=datamodule, strategy="freeze") # 6. Predict what's on a few images! # Serialize predictions as labels, low threshold to see more predictions. model.serializer = Labels(genres, multi_label=True, threshold=0.25) predictions = model.predict([ "data/movie_posters/predict/tt0085318.jpg", "data/movie_posters/predict/tt0089461.jpg", "data/movie_posters/predict/tt0097179.jpg", ]) print(predictions) # 7. Save it! trainer.save_checkpoint("image_classification_multi_label_model.pt")
# Copyright The PyTorch Lightning team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from flash.core.classification import Labels from flash.data.utils import download_data from flash.tabular import TabularClassifier # 1. Download the data download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "data/") # 2. Load the model from a checkpoint model = TabularClassifier.load_from_checkpoint( "https://flash-weights.s3.amazonaws.com/tabular_classification_model.pt") model.serializer = Labels(['Did not survive', 'Survived']) # 3. Generate predictions from a sheet file! Who would survive? predictions = model.predict("data/titanic/titanic.csv") print(predictions)
# Copyright The PyTorch Lightning team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from flash.core.classification import Labels from flash.tabular import TabularClassifier model = TabularClassifier.load_from_checkpoint( "https://flash-weights.s3.amazonaws.com/tabular_classification_model.pt") model.serializer = Labels(["Did not survive", "Survived"]) model.serve()