def step_one(pickle_data_url: str, extra: int = 43): print('step_one') # make sure we have scikit-learn for this step, we need it to use to unpickle the object import sklearn # noqa import pickle import pandas as pd from clearml import StorageManager local_iris_pkl = StorageManager.get_local_copy(remote_url=pickle_data_url) with open(local_iris_pkl, 'rb') as f: iris = pickle.load(f) data_frame = pd.DataFrame(iris['data'], columns=iris['feature_names']) data_frame.columns += ['target'] data_frame['target'] = iris['target'] return data_frame
def step_one(pickle_data_url): # make sure we have scikit-learn for this step, we need it to use to unpickle the object import sklearn # noqa import pickle import pandas as pd from clearml import StorageManager pickle_data_url = \ pickle_data_url or \ 'https://github.com/allegroai/events/raw/master/odsc20-east/generic/iris_dataset.pkl' local_iris_pkl = StorageManager.get_local_copy(remote_url=pickle_data_url) with open(local_iris_pkl, 'rb') as f: iris = pickle.load(f) data_frame = pd.DataFrame(iris['data'], columns=iris['feature_names']) data_frame.columns += ['target'] data_frame['target'] = iris['target'] return data_frame
from clearml import Task, StorageManager # create an dataset experiment task = Task.init(project_name="examples", task_name="Pipeline step 1 dataset artifact") # only create the task, we will actually execute it later task.execute_remotely() # simulate local dataset, download one, so we have something local local_iris_pkl = StorageManager.get_local_copy( remote_url= 'https://github.com/allegroai/events/raw/master/odsc20-east/generic/iris_dataset.pkl' ) # add and upload local file containing our toy dataset task.upload_artifact('dataset', artifact_object=local_iris_pkl) print('uploading artifacts in the background') # we are done print('Done')
def main(): # Connecting ClearML with the current process, # from here on everything is logged automatically task = Task.init( project_name="examples", task_name="Model update PyTorch", auto_connect_frameworks={"pytorch": False}, ) params = { "number_of_epochs": 1, "batch_size": 64, "dropout": 0.25, "base_lr": 0.001, "momentum": 0.9, "loss_report": 100, } params = task.connect(params) # enabling configuration override by clearml print(params ) # printing actual configuration (after override in remote mode) model = OutputModel(task=task, framework="pytorch") model_config_dict = { "list_of_ints": [1, 2, 3, 4], "dict": { "sub_value": "string", "sub_integer": 11 }, "value": 13.37 } model.update_design(config_dict=model_config_dict) manager = StorageManager() dataset_path = Path( manager.get_local_copy( remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" )) # Dataset and Dataloader initializations transform = transforms.Compose([transforms.ToTensor()]) trainset = datasets.CIFAR10(root=dataset_path, train=True, download=False, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=params.get( "batch_size", 4), shuffle=True, num_workers=10) testset = datasets.CIFAR10(root=dataset_path, train=False, download=False, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=params.get( "batch_size", 4), shuffle=False, num_workers=10) run( params["number_of_epochs"], params["base_lr"], params["momentum"], 10, params, trainloader, testloader, model, )
task.connect(args) print('Arguments: {}'.format(args)) # only create the task, we will actually execute it later task.execute_remotely() # get dataset from task's artifact if args['dataset_task_id']: dataset_upload_task = Task.get_task(task_id=args['dataset_task_id']) print('Input task id={} artifacts {}'.format( args['dataset_task_id'], list(dataset_upload_task.artifacts.keys()))) # download the artifact iris_pickle = dataset_upload_task.artifacts['dataset'].get_local_copy() # get the dataset from a direct url elif args['dataset_url']: iris_pickle = StorageManager.get_local_copy(remote_url=args['dataset_url']) else: raise ValueError("Missing dataset link") # open the local copy iris = pickle.load(open(iris_pickle, 'rb')) # "process" data X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=args['test_size'], random_state=args['random_state']) # upload processed data print('Uploading process dataset') task.upload_artifact('X_train', X_train)
# Connecting ClearML with the current process, # from here on everything is logged automatically task = Task.init(project_name='examples', task_name='Image classification CIFAR10') params = { 'number_of_epochs': 20, 'batch_size': 64, 'dropout': 0.25, 'base_lr': 0.001, 'momentum': 0.9, 'loss_report': 100 } params = task.connect(params) # enabling configuration override by clearml print(params) # printing actual configuration (after override in remote mode) manager = StorageManager() dataset_path = Path( manager.get_local_copy( remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")) # Dataset and Dataloader initializations transform = transforms.Compose([transforms.ToTensor()]) trainset = datasets.CIFAR10(root=dataset_path, train=True, download=False, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=params.get( 'batch_size', 4),
from clearml import Task, StorageManager task = Task.init(project_name="mushrooms", task_name="mushrooms step 1 dataset artifact", task_type=Task.TaskTypes.data_processing) task.execute_remotely() local_mushrooms_dataset = StorageManager.get_local_copy( remote_url="https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/mushrooms.csv") task.upload_artifact("dataset", artifact_object=local_mushrooms_dataset) print('uploading csv dataset in the background')
opt.data = args['data'] opt.evolve = args['evolve'] opt.hyp = args['hyp'] #Some issue , Fix to False opt.resume = False print(f'args\t{args}') print(f'opt\t{opt}') print(f'Reading in data from clearml') from clearml import Dataset, StorageManager print(f"CHECK dataset url\t{args['dataset_url']}") if args['dataset_url'] == '': args[ 'dataset_url'] = 'http://192.168.180.150:30081/pipe_it_up/grayscale.4030799c8a0d493983f287b454a549b3/artifacts/dataset/ds_ece1c9373b924f4ca3719ee53afd4647.zip' data_dir = StorageManager.get_local_copy(remote_url=args['dataset_url']) print(f"{args['dataset_url']}\tcheck the path data_dir\n{data_dir}") clearml_path = {} clearml_path['train'] = data_dir clearml_path['val'] = data_dir clearml_path['freeze_backbone'] = args['freeze_backbone'] print(f'In Main: clearml_path:\t{clearml_path}') # Set DDP variables opt.world_size = int( os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 set_logging(opt.global_rank) if opt.global_rank in [-1, 0]: check_git_status() check_requirements(exclude=('pycocotools', 'thop'))