Source code for aihwkit.inference.noise.custom

# -*- coding: utf-8 -*-

# (C) Copyright 2020, 2021, 2022, 2023, 2024 IBM. All Rights Reserved.
#
# Licensed under the MIT license. See LICENSE file in the project root for details.

"""Phenomenological noise model for inference."""

from copy import deepcopy
from typing import List, Optional

from numpy import log as numpy_log
from numpy import sqrt
from torch import abs as torch_abs
from torch import randn_like, Tensor
from torch.autograd import no_grad

from aihwkit.inference.noise.base import BaseNoiseModel
from aihwkit.inference.converter.base import BaseConductanceConverter
from aihwkit.inference.converter.conductance import SinglePairConductanceConverter


[docs]class StateIndependentNoiseModel(BaseNoiseModel):  # pylint: disable=too-many-instance-attributes
    r"""Standard noise model that has a non-conductance dependent drift and
     multiplicative read (1/f) noise.

     Programming noise is state-independent by default, however, it can
     be made conductance dependent, since the expected programming
     noise strength is modeled with a second-order polynomial in
     general.

    **Programming noise** is thus given by:

     .. math::

          \sigma_text{programming noise}=\gamma\,\left(c_0 + c_1 \frac{g_T}{g_\text{max}} +
               + c_2 \frac{g_T^2}{g_\text{max}^2}\right)

     where :math:`\gamma` is a additional convenience scale and :math:`g_T`
     is the target conductance established from the given
     ``g_converter`` from the weight matrix.  The default programming
     noise is constant (state independent): :math:`c_0=0.2\mu\mathrm{S}`
     and other coefficient set to :math:`0.0`.

     **Drift** is for each device is computed as

     .. math::

         g_\text{drift}(t) = g_\text{prog}(t / t_0) ^{- \nu}

     with the drift coefficient determined at the beginning for each
     device with

     .. math::

         \nu= \zeta\, |\nu_\text{mean} + \nu_\text{std}\xi|_+

     where :math:`\xi` is a Gaussian random number and
     :math:`|\cdot|_+` rectifies negative value to zero. :math:`\zeta`
     is an additional drift scale.

     **Read noise** is given by

     .. math::

         \sigma_\text{read} = \rho \frac{g_\text{drift}(t)}{g_\text{max}} \sqrt{\log\left(\frac{t
         + t_\text{read}}{2 t_\text{read}}\right)}

     This :math:`\sigma_\text{read}` is then used to add Gaussian noise
     of this magnitude to the drifted conductance. The read noise scale
     :math:`\rho` can be used to scale the read noise.

     Args:
         g_converter: instantiated class of the conductance converter
             (defaults to single pair)

         g_max: In :math:`\mu S`, the maximal conductance, ie the value
             the absolute max of the weights will be mapped to.

         prog_coeff: programming polynomial coefficients :math:`c_i` in
             :math:`\mu S`. Default is constant :math:`c_0=0.2` and
             other coefficient set to 0.0.

         prog_noise_scale: scale :math:\gamma: for the programming noise

         drift_nu_mean: mean :math:`\nu_\text{mean}` of power-law drift
             coefficient (:math:`\nu`) (before ``drift_scale``
             :math:`\zeta` is applied).

         drift_nu_std: device-to-device variability
             :math:`\nu_\text{std}` of the power-law drift coefficient
             (before ``drift_scale`` is applied)

         drift_scale: additional scale :math:`\zeta` applied to all
             drawn drift coefficients

         t_0: parameter of the drift (first reading time), see above.

             Note:
                 The ``t_inference`` is relative to this time ``t0``
                 e.g. ``t_inference`` counts from the completion of the
                 programming of a device.

         read_noise_scale: scale :math:`\rho` for scaling the read and
             accumulated noise :math:`1/f`.

         t_read: parameter of the :math:`1/f` noise (in seconds)

    """

    def __init__(  # pylint: disable=too-many-arguments
        self,
        g_converter: Optional[BaseConductanceConverter] = None,
        g_max: Optional[float] = None,
        prog_coeff: Optional[List[float]] = None,
        prog_noise_scale: float = 1.0,
        drift_nu_mean: float = 0.1,
        drift_nu_std: float = 0.05,
        drift_scale: float = 1.0,
        t_0: float = 20.0,
        read_noise_scale: float = 1.0,
        t_read: float = 250.0e-9,
    ):
        g_converter = deepcopy(g_converter) or SinglePairConductanceConverter(g_max=g_max)
        super().__init__(g_converter)

        self.g_max = getattr(self.g_converter, "g_max", g_max)

        if self.g_max is None:
            raise ValueError("g_max cannot be established from g_converter")

        self.prog_coeff = [0.2, 0.0, 0.0] if prog_coeff is None else prog_coeff
        self.prog_noise_scale = prog_noise_scale
        self.drift_nu_mean = drift_nu_mean
        self.drift_nu_std = drift_nu_std
        self.drift_scale = drift_scale
        self.t_0 = t_0
        self.read_noise_scale = read_noise_scale
        self.t_read = t_read

[docs]    @no_grad()
    def apply_programming_noise_to_conductance(self, g_target: Tensor) -> Tensor:
        """Apply programming noise to a target conductance Tensor.

        Programming noise with additive Gaussian noise with
        conductance dependency of the variance given by a 2-degree
        polynomial.
        """
        mat = 1
        sig_prog = self.prog_coeff[0]
        for coeff in self.prog_coeff[1:]:
            mat *= g_target / self.g_max
            sig_prog += mat * coeff

        g_prog = g_target + self.prog_noise_scale * sig_prog * randn_like(g_target)
        g_prog.clamp_(min=0.0)  # no negative conductances allowed
        return g_prog

[docs]    @no_grad()
    def generate_drift_coefficients(self, g_target: Tensor) -> Tensor:
        """Return drift coefficients ``nu``."""

        mu_drift = self.drift_nu_mean
        sig_drift = self.drift_nu_std
        nu_drift = torch_abs(mu_drift + sig_drift * randn_like(g_target)).clamp(min=0.0)
        return nu_drift * self.drift_scale

[docs]    @no_grad()
    def apply_drift_noise_to_conductance(
        self, g_prog: Tensor, drift_noise_param: Tensor, t_inference: float
    ) -> Tensor:
        """Apply the noise and drift up to the assumed inference time
        point."""
        t = t_inference + self.t_0

        # drift
        if t > self.t_0:
            g_drift = g_prog * ((t / self.t_0) ** (-drift_noise_param))
        else:
            g_drift = g_prog

        # expected accumulated 1/f noise since start of programming at t=0
        if t > 0:
            sig_noise = sqrt(numpy_log((t + self.t_read) / (2 * self.t_read)))
            g_final = g_drift + torch_abs(
                g_drift / self.g_max
            ) * self.read_noise_scale * sig_noise * randn_like(g_drift)
        else:
            g_final = g_prog

        return g_final.clamp(min=0.0)