Source code for habitat.profiling.operation

import torch
import logging

from habitat.analysis.metrics import resolve_metrics
from habitat.analysis.run_time import RunTimeMeasurement
from habitat.profiling.autograd import AutogradEngine
from habitat.profiling.backward import BackwardHelper, backward_available
from habitat.profiling.kernel import KernelProfiler

[docs]logger = logging.getLogger(__name__)
[docs]class OperationProfiler: def __init__( self, device, metrics=None, metrics_threshold_ms=0, warm_up=3, measure_for=10 ): self._device = device self._metrics = resolve_metrics(metrics, device) self._kernel_profiler = KernelProfiler( device, self._metrics, metrics_threshold_ms=metrics_threshold_ms, ) self._warm_up = warm_up self._measure_for = measure_for self._start_event = torch.cuda.Event(enable_timing=True) self._end_event = torch.cuda.Event(enable_timing=True)
[docs] def measure_operation(self, func, args, kwargs, record_kernels=True): func_name = getattr(func, '__name__', '') for_inplace = _is_potentially_inplace(func_name) forward_args, forward_kwargs = self._get_args_for_profiling( args, kwargs, for_inplace) def forward_runnable(): func(*forward_args, **forward_kwargs) forward = self._to_run_time_measurement( func_name, forward_runnable, record_kernels, ) # We need separate copies of the arguments for the forward and backward # measurements because func might be inplace. Running an inplace # function repeatedly will affect the autograd graph, which causes # problems when we try to measure the backward pass. backward_args, backward_kwargs = self._get_args_for_profiling( args, kwargs, for_inplace) retval = func(*backward_args, **backward_kwargs) if not backward_available(retval): return forward, None return forward, self._measure_backward( func_name, retval, record_kernels,
[docs] def _get_args_for_profiling(self, args, kwargs, for_inplace=False): cloned_args = tuple(map( lambda arg: self._clone_tensors(arg, for_inplace), args)) cloned_kwargs = { key: self._clone_tensors(value, for_inplace) for key, value in kwargs.items() } return cloned_args, cloned_kwargs
[docs] def _clone_tensors(self, argument, for_inplace): if isinstance(argument, torch.Tensor): detached = argument.detach() detached.requires_grad_(argument.requires_grad) # We need to clone the tensor for inplace operations because they # cannot be executed on a leaf tensor. This adds some overhead to # our backward measurements (an extra CloneBackward function), but # it _should_ be negligible. I chose not to exclude CloneBackward # from the backward measurements to avoid introducing incorrectness # if the user actually uses clone() in their own code. return detached if not for_inplace else detached.clone() if isinstance(argument, tuple): return tuple(map( lambda arg: self._clone_tensors(arg, for_inplace), argument)) if isinstance(argument, list): return list(map( lambda arg: self._clone_tensors(arg, for_inplace), argument)) return argument
[docs] def _measure_backward(self, func_name, operation_outputs, record_kernels): # As of PyTorch 1.5.1, sometimes our AutogradEngine will not work # because the behavior of grad_functions has changed. When this # happens, we fall back to using PyTorch's existing backward engine. # # The reason we do not always use the existing backward engine is that # the existing engine has a start up overhead that is non-negligible # when there are many short operations involved. try: return self._measure_backward_engine_strategy( func_name, operation_outputs, record_kernels, ) except (RuntimeError, TypeError): logger.debug("%s: Falling back to PyTorch's engine", func_name) return self._measure_backward_torch_strategy( func_name, operation_outputs, record_kernels,
[docs] def _measure_backward_engine_strategy( self, func_name, operation_outputs, record_kernels, ): engine = AutogradEngine.new_from(operation_outputs) return self._to_run_time_measurement( func_name, engine.run_backward, record_kernels,
[docs] def _measure_backward_torch_strategy( self, func_name, operation_outputs, record_kernels, ): helper = BackwardHelper.new_from(operation_outputs) backward_ms = self._measure_ms(helper.run_backward) accum_grad_ms = self._measure_ms(helper.run_accumulate_grad) diff = backward_ms - accum_grad_ms return self._to_run_time_measurement( func_name, helper.run_backward, record_kernels, run_time_ms=(diff if diff >= 1e-6 else backward_ms),
[docs] def _to_run_time_measurement( self, func_name, runnable, record_kernels, run_time_ms=None, ): run_time = ( self._measure_ms(runnable) if run_time_ms is None else run_time_ms ) return RunTimeMeasurement( run_time_ms=run_time, kernels=( self._kernel_profiler.measure_kernels(runnable, func_name) if record_kernels else [] ), device=self._device,
[docs] def _measure_ms(self, runnable): for _ in range(self._warm_up): runnable() self._start_event.record() for _ in range(self._measure_for): runnable() self._end_event.record() torch.cuda.synchronize() return ( self._start_event.elapsed_time(self._end_event) / self._measure_for
) # Populated manually from: #
[docs]POTENTIALLY_INPLACE_FUNCTIONS = { 'threshold', 'relu', 'hardtanh', 'relu6', 'elu', 'selu', 'celu', 'leaky_relu', 'rrelu', 'dropout', 'alpha_dropout', 'dropout2d', 'dropout3d', # In place math operations (+=, *=, -=, /=, //=) '__iadd__', '__imul__', '__isub__', '__itruediv__', '__ifloordiv__',
[docs]def _is_potentially_inplace(fn_name): return ( fn_name in POTENTIALLY_INPLACE_FUNCTIONS or # In PyTorch, functions with a '_' suffix are in place, by convention (len(fn_name) > 1 and fn_name[-1] == '_' and fn_name[-2] != '_')