Exemplo n.º 1
0
    def __init__(self,
                 by_epoch: bool = True,
                 profile_iters: int = 1,
                 activities: List[str] = ['cpu', 'cuda'],
                 schedule: Optional[dict] = None,
                 on_trace_ready: Optional[Union[Callable, dict]] = None,
                 record_shapes: bool = False,
                 profile_memory: bool = False,
                 with_stack: bool = False,
                 with_flops: bool = False,
                 json_trace_path: Optional[str] = None) -> None:
        try:
            from torch import profiler  # torch version >= 1.8.1
        except ImportError:
            raise ImportError('profiler is the new feature of torch1.8.1, '
                              f'but your version is {torch.__version__}')

        assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.'
        self.by_epoch = by_epoch

        if profile_iters < 1:
            raise ValueError('profile_iters should be greater than 0, but got '
                             f'{profile_iters}')
        self.profile_iters = profile_iters

        if not isinstance(activities, list):
            raise ValueError(
                f'activities should be list, but got {type(activities)}')
        self.activities = []
        for activity in activities:
            activity = activity.lower()
            if activity == 'cpu':
                self.activities.append(profiler.ProfilerActivity.CPU)
            elif activity == 'cuda':
                self.activities.append(profiler.ProfilerActivity.CUDA)
            else:
                raise ValueError(
                    f'activity should be "cpu" or "cuda", but got {activity}')

        if schedule is not None:
            self.schedule = profiler.schedule(**schedule)
        else:
            self.schedule = None

        self.on_trace_ready = on_trace_ready
        self.record_shapes = record_shapes
        self.profile_memory = profile_memory
        self.with_stack = with_stack
        self.with_flops = with_flops
        self.json_trace_path = json_trace_path
Exemplo n.º 2
0
 def __init__(self,
              record_func_name='inference',
              activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
              record_shapes=False,
              profile_memory=True,
              scheduler=schedule(wait=1, warmup=1, active=2),
              trace_handler=tensorboard_trace_handler('./log')):
     self.activities = activities
     self.profile = profile(activities=activities,
                            record_shapes=record_shapes,
                            profile_memory=profile_memory,
                            with_flops=True,
                            schedule=scheduler,
                            on_trace_ready=trace_handler)
     self.record_function = record_function(record_func_name)
Exemplo n.º 3
0
    def train_func():
        from ray.train.torch import TorchWorkerProfiler
        from torch.profiler import profile, record_function, schedule

        twp = TorchWorkerProfiler()
        with profile(
                activities=[],
                schedule=schedule(wait=0, warmup=0, active=1),
                on_trace_ready=twp.trace_handler,
        ) as p:

            for epoch in range(num_epochs):
                with record_function("test_function"):
                    pass

                p.step()

                profile_results = twp.get_and_clear_profile_traces()
                train.report(epoch=epoch, **profile_results)
Exemplo n.º 4
0
def train_func():
    twp = TorchWorkerProfiler()
    with profile(
            activities=[],
            schedule=schedule(wait=0, warmup=0, active=1),
            on_trace_ready=twp.trace_handler,
    ) as p:

        # Setup model.
        model = torch.nn.Linear(1, 1)
        model = train.torch.prepare_model(model)
        loss_fn = torch.nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

        # Setup data.
        input = torch.randn(1000, 1)
        labels = input * 2
        dataset = torch.utils.data.TensorDataset(input, labels)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=32)
        dataloader = train.torch.prepare_data_loader(dataloader)

        # Train.
        for epoch in range(5):
            with record_function("train_epoch"):
                for X, y in dataloader:
                    pred = model(X)
                    loss = loss_fn(pred, y)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

            with record_function("train_checkpoint"):
                state_dict = model.state_dict()
                consume_prefix_in_state_dict_if_present(state_dict, "module.")
                train.save_checkpoint(epoch=epoch, model_weights=state_dict)

            p.step()

            with record_function("train_report"):
                profile_results = twp.get_and_clear_profile_traces()
                train.report(epoch=epoch, **profile_results)
Exemplo n.º 5
0
#p.outShape = (1, 64, 1088, 1920)

getMemUsed = lambda i: torch.cuda.memory_stats(i)['reserved_bytes.all.peak']
t = torch.randn(shape, dtype=config.dtype(), device=config.device()) # pylint: disable=E1101
load = shape[-1] * shape[-2] * shape[0]
m = getMemUsed(config.device()) if config.cuda else None
print(config.dtype(), config.device(), m)
if config.cuda:
  p(t)
  #doCrop(p, t)
  getMemUsed(config.device())
  start = perf_counter()
  p(t)
  #doCrop(p, t).mean().cpu()
  print('time elpased: {}'.format(perf_counter() - start))
  m = getMemUsed(config.device())
else:
  schedule1 = schedule(
    wait=1,
    warmup=1,
    active=1)
  with profile(
    activities=[ProfilerActivity.CPU],
    schedule=schedule1, profile_memory=True) as pro:
    for _ in range(3):
      p(t)
      pro.step()
    avg = pro.key_averages()
    avg.sort(key=lambda o: o.cpu_memory_usage, reverse=True)
    m = avg[0].cpu_memory_usage
print(m, m / load, load)
Exemplo n.º 6
0
# (such as training loops). Tracing all of the execution can be
# slow and result in very large trace files. To avoid this, use optional
# arguments:
#
# - ``schedule`` - specifies a function that takes an integer argument (step number)
#   as an input and returns an action for the profiler, the best way to use this parameter
#   is to use ``torch.profiler.schedule`` helper function that can generate a schedule for you;
# - ``on_trace_ready`` - specifies a function that takes a reference to the profiler as
#   an input and is called by the profiler each time the new trace is ready.
#
# To illustrate how the API works, let's first consider the following example with
# ``torch.profiler.schedule`` helper function:

from torch.profiler import schedule

my_schedule = schedule(skip_first=10, wait=5, warmup=1, active=3, repeat=2)

######################################################################
# Profiler assumes that the long-running job is composed of steps, numbered
# starting from zero. The example above defines the following sequence of actions
# for the profiler:
#
# 1. Parameter ``skip_first`` tells profiler that it should ignore the first 10 steps
#    (default value of ``skip_first`` is zero);
# 2. After the first ``skip_first`` steps, profiler starts executing profiler cycles;
# 3. Each cycle consists of three phases:
#
#    - idling (``wait=5`` steps), during this phase profiler is not active;
#    - warming up (``warmup=1`` steps), during this phase profiler starts tracing, but
#      the results are discarded; this phase is used to discard the samples obtained by
#      the profiler at the beginning of the trace since they are usually skewed by an extra