Source code for aihwkit.simulator.parameters.inference

# -*- coding: utf-8 -*-

# (C) Copyright 2020, 2021, 2022, 2023, 2024 IBM. All Rights Reserved.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

# pylint: disable=too-many-instance-attributes
# pylint: disable=too-many-lines

"""Inference related parameters for resistive processing units."""

from dataclasses import dataclass, field
from typing import ClassVar, Type, List, Optional, Union

from aihwkit.simulator.parameters.helpers import _PrintableMixin
from aihwkit.simulator.rpu_base import tiles
from aihwkit.simulator.parameters.enums import WeightModifierType, WeightClipType, WeightRemapType


[docs]@dataclass class WeightModifierParameter(_PrintableMixin): """Parameter that modify the forward/backward weights during hardware-aware training.""" bindings_class: ClassVar[Optional[Union[str, Type]]] = "WeightModifierParameter" bindings_module: ClassVar[str] = "tiles" std_dev: float = 0.0 """Standard deviation of the added noise to the weight matrix. This parameter affects the modifier types ``AddNormal``, ``MultNormal`` and ``DiscretizeAddNormal``. Note: If the parameter ``rel_to_actual_wmax`` is set then the ``std_dev`` is computed in relative terms to the abs max of the given weight matrix, otherwise it in relative terms to the assumed max, which is set by ``assumed_wmax``. """ per_batch_sample: bool = False """Should we resample noise for each sample in the batch. This parameter only affects is used when using the ``TorchSimulatorTile``. In case of ``RPUCudaTile`` it will throw an error. """ res: float = 0.0 r"""Resolution of the discretization. The invert of ``res`` gives the number of equal sized steps in :math:`-a_\text{max}\ldots,a_\text{max}` where the :math:`a_\text{max}` is either given by the abs max (if ``rel_to_actual_wmax`` is set) or ``assumed_wmax`` otherwise. ``res`` is only used in the modifier types ``DoReFa``, ``Discretize``, and ``DiscretizeAddNormal``. """ sto_round: bool = False """Whether the discretization is done with stochastic rounding enabled. ``sto_round`` is only used in the modifier types ``DoReFa``, ``Discretize``, and ``DiscretizeAddNormal``. """ dorefa_clip: float = 0.6 """Parameter for DoReFa.""" pdrop: float = 0.0 """Drop connect probability. Drop connect sets weights to zero with the given probability. This implements drop connect. Important: Drop connect can be used with any other modifier type in combination. """ enable_during_test: bool = False """Whether to use the last modified weight matrix during testing. Caution: This will **not** remove drop connect or any other noise during evaluation, and thus should only used with care. """ rel_to_actual_wmax: bool = True """Whether to calculate the abs max of the weight and apply noise relative to this number. If set to False, ``assumed_wmax`` is taken as relative units. """ assumed_wmax: float = 1.0 """Assumed weight value that is mapped to the maximal conductance. This is typically 1.0. This parameter will be ignored if ``rel_to_actual_wmax`` is set. """ copy_last_column: bool = False """Whether to not apply noise to the last column (which usually contains the bias values).""" coeffs: List[float] = field( default_factory=lambda: [0.0105392, 0.0768, -0.046925], metadata={"hide_if": [0.0105392, 0.0768, -0.046925]}, ) """Coefficients for the ``POLY`` weight modifier type. See :class:`WeightModifierType` for details. """ type: WeightModifierType = field( default_factory=lambda: WeightModifierType.NONE, metadata={"always_show": True} ) """Type of the weight modification.""" g_max: float = 25.0 r"""PCM_NOISE and PROG_NOISE parameter, :math:`g_\text{max}` setting in :math:`\mu S`.""" pcm_zero_thres: float = 0.0 """PCM_NOISE parameter """ pcm_t_inference: float = 0.0 """PCM_NOISE parameter, time of inference. """ pcm_prob_at_reset: float = 0.0 """PCM_NOISE parameter, probability of reset. """ pcm_prob_at_gmax: float = 0.0 r"""PCM_NOISE parameter, probability of devices being at :math:`g_\text{max}`. """ pcm_prob_at_random: float = 0.0 r"""PCM_NOISE parameter, probability of devices being at random value in the range. """ pcm_t0: float = 20.0 r"""PCM_NOISE parameter, programming conversion time in seconds. """
[docs]@dataclass class WeightClipParameter(_PrintableMixin): """Parameter that clip the weights during hardware-aware training. Important: A clipping ``type`` has to be set before any of the parameter changes take any effect. """ bindings_class: ClassVar[Optional[Union[str, Type]]] = tiles.WeightClipParameter fixed_value: float = -1.0 """Clipping value in case of ``FixedValue`` type. Caution: If ``fixed_value > 0`` it will be also applied during other clipping types. """ sigma: float = 2.5 """Sigma value for clipping for the ``LayerGaussian`` type.""" type: WeightClipType = field( default_factory=lambda: WeightClipType.NONE, metadata={"always_show": True} ) """Type of clipping."""
[docs]@dataclass class WeightRemapParameter(_PrintableMixin): """Parameter that remap the weights during hardware-aware training. Important: A remap ``type`` has to be set before any of the parameter changes take any effect. """ bindings_class: ClassVar[Optional[Union[str, Type]]] = tiles.WeightRemapParameter remapped_wmax: float = 1.0 """Assumed max of weight, ie the value of the weight the maximal conductance is mapped to. Typically 1.0. """ max_scale_range: float = 0.0 """Maximal range of scale values. Use zero to turn any restrictions off (default).""" max_scale_ref: float = 0.0 """Reference scale that use used as minimal scale for determining the scale range.""" type: WeightRemapType = field( default_factory=lambda: WeightRemapType.NONE, metadata={"always_show": True} ) """Type of clipping."""
[docs]@dataclass class SimpleDriftParameter(_PrintableMixin): r"""Parameter for a simple power law drift. The drift as a simple power law drift without device-to-device variation or conductance dependence. It computes: .. math:: w_{ij}*\left(\frac{t + \Delta t}{t_0}\right)^(-\nu) """ bindings_class: ClassVar[Optional[Union[str, Type]]] = "DriftParameter" bindings_module: ClassVar[str] = "devices" nu: float = 0.0 r"""Average drift :math:`\nu` value. Need to non-zero to actually use the drift. """ t_0: float = 1.0 """Time between write and first read. Usually assumed in milliseconds, however, it really determines the time units of ``time_since_last_call`` when calling the drift. """ reset_tol: float = 1e-7 """Reset tolerance. This should a number smaller than the expected weight change as it is used to detect any changes in the weight from the last drift call. Every change to the weight above this tolerance will reset the drift time. Caution: Any write noise or diffusion on the weight might thus interfere with the drift. """
[docs]@dataclass class DriftParameter(SimpleDriftParameter): r"""Parameter for a power law drift. The drift is based on the model described by `Oh et al (2019)`_. It computes: .. math:: w_{ij}*\left(\frac{t + \Delta t}{t_0}\right)^(-\nu^\text{actual}_{ij}) where the drift coefficient is drawn once at the beginning and might depend on device. It also can depend on the actual weight value. The actual drift coefficient is computed as: .. math:: \nu_{ij}^\text{actual} = \nu_{ij} - \nu_k \log \frac{(w_{ij} - w_\text{off}) / r_\text{wg} + g_\text{off}}{G_0} + \nu\sigma_\nu\xi here :math:`w_{ij}` is the actual weight and `\nu_{ij}` fixed for each device given by the mean :math:`\nu` and the device-to-device variation: :math:`\nu_{ij} = \nu + \nu_dtod\nu\xi` and are only drawn once at the beginning (tile instantiation). `\xi` is Gaussian noise. Note: If the weight has changed from the last drift call (determined by the ``reset_tol`` parameter), for instance due to update, decay or noise, then the drift time :math:`t` will be reset and start from new, however, the drift coefficients :math:`\nu_{ij}` are *not* changed. On the other hand, if the weights has not changed since last call, :math:`t` will accumulate the time. Caution: Note that the drift coefficient does *not* depend on the initially programmed weight value at :math:`t=0` in the current implementation (ie G0 is a constant for all devices), but instead on the actual weight. In some materials (e.g. phase changed materials), that might be not accurate. .. _`Oh et al (2019)`: https://ieeexplore.ieee.org/document/8753712 """ nu_dtod: float = 0.0 r"""Device-to-device variation of the :math:`\nu` values.""" nu_std: float = 0.0 r"""Cycle-to-cycle variation of :math:`\nu`. A more realistic way to add noise of the drift might be using ``w_noise_std``. """ wg_ratio: float = 1.0 """``(w_max-w_min)/(g_max-g_min)`` to convert to physical units.""" g_offset: float = 0.0 """``g_min`` to convert to physical units.""" w_offset: float = 0.0 """``w(g_min)``, i.e. to what value ``g_min`` is mapped to in w-space.""" nu_k: float = 0.0 r"""Variation of math:`nu` with :math:`W`. That is :math:`\nu(R) = nu_0 - k \log(G/G_0)`. See Oh et al. for details. """ log_g0: float = 0.0 """Log g0.""" w_noise_std: float = 0.0 """Additional weight noise (Gaussian diffusion) added to the weights after the drift is applied."""