Source code for aihwkit.inference.noise.custom

# -*- coding: utf-8 -*-

# (C) Copyright 2020, 2021, 2022, 2023, 2024 IBM. All Rights Reserved.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Phenomenological noise model for inference."""

from copy import deepcopy
from typing import List, Optional

from numpy import log as numpy_log
from numpy import sqrt
from torch import abs as torch_abs
from torch import randn_like, Tensor
from torch.autograd import no_grad

from aihwkit.inference.noise.base import BaseNoiseModel
from aihwkit.inference.converter.base import BaseConductanceConverter
from aihwkit.inference.converter.conductance import SinglePairConductanceConverter


[docs]class StateIndependentNoiseModel(BaseNoiseModel):  # pylint: disable=too-many-instance-attributes
    r"""Standard noise model that has a non-conductance dependent drift and
     multiplicative read (1/f) noise.

     Programming noise is state-independent by default, however, it can
     be made conductance dependent, since the expected programming
     noise strength is modeled with a second-order polynomial in
     general.

    **Programming noise** is thus given by:

     .. math::

          \sigma_text{programming noise}=\gamma\,\left(c_0 + c_1 \frac{g_T}{g_\text{max}} +
               + c_2 \frac{g_T^2}{g_\text{max}^2}\right)

     where :math:`\gamma` is a additional convenience scale and :math:`g_T`
     is the target conductance established from the given
     ``g_converter`` from the weight matrix.  The default programming
     noise is constant (state independent): :math:`c_0=0.2\mu\mathrm{S}`
     and other coefficient set to :math:`0.0`.

     **Drift** is for each device is computed as

     .. math::

         g_\text{drift}(t) = g_\text{prog}(t / t_0) ^{- \nu}

     with the drift coefficient determined at the beginning for each
     device with

     .. math::

         \nu= \zeta\, |\nu_\text{mean} + \nu_\text{std}\xi|_+

     where :math:`\xi` is a Gaussian random number and
     :math:`|\cdot|_+` rectifies negative value to zero. :math:`\zeta`
     is an additional drift scale.

     **Read noise** is given by

     .. math::

         \sigma_\text{read} = \rho \frac{g_\text{drift}(t)}{g_\text{max}} \sqrt{\log\left(\frac{t
         + t_\text{read}}{2 t_\text{read}}\right)}

     This :math:`\sigma_\text{read}` is then used to add Gaussian noise
     of this magnitude to the drifted conductance. The read noise scale
     :math:`\rho` can be used to scale the read noise.

     Args:
         g_converter: instantiated class of the conductance converter
             (defaults to single pair)

         g_max: In :math:`\mu S`, the maximal conductance, ie the value
             the absolute max of the weights will be mapped to.

         prog_coeff: programming polynomial coefficients :math:`c_i` in
             :math:`\mu S`. Default is constant :math:`c_0=0.2` and
             other coefficient set to 0.0.

         prog_noise_scale: scale :math:\gamma: for the programming noise

         drift_nu_mean: mean :math:`\nu_\text{mean}` of power-law drift
             coefficient (:math:`\nu`) (before ``drift_scale``
             :math:`\zeta` is applied).

         drift_nu_std: device-to-device variability
             :math:`\nu_\text{std}` of the power-law drift coefficient
             (before ``drift_scale`` is applied)

         drift_scale: additional scale :math:`\zeta` applied to all
             drawn drift coefficients

         t_0: parameter of the drift (first reading time), see above.

             Note:
                 The ``t_inference`` is relative to this time ``t0``
                 e.g. ``t_inference`` counts from the completion of the
                 programming of a device.

         read_noise_scale: scale :math:`\rho` for scaling the read and
             accumulated noise :math:`1/f`.

         t_read: parameter of the :math:`1/f` noise (in seconds)

    """

    def __init__(  # pylint: disable=too-many-arguments
        self,
        g_converter: Optional[BaseConductanceConverter] = None,
        g_max: Optional[float] = None,
        prog_coeff: Optional[List[float]] = None,
        prog_noise_scale: float = 1.0,
        drift_nu_mean: float = 0.1,
        drift_nu_std: float = 0.05,
        drift_scale: float = 1.0,
        t_0: float = 20.0,
        read_noise_scale: float = 1.0,
        t_read: float = 250.0e-9,
    ):
        g_converter = deepcopy(g_converter) or SinglePairConductanceConverter(g_max=g_max)
        super().__init__(g_converter)

        self.g_max = getattr(self.g_converter, "g_max", g_max)

        if self.g_max is None:
            raise ValueError("g_max cannot be established from g_converter")

        self.prog_coeff = [0.2, 0.0, 0.0] if prog_coeff is None else prog_coeff
        self.prog_noise_scale = prog_noise_scale
        self.drift_nu_mean = drift_nu_mean
        self.drift_nu_std = drift_nu_std
        self.drift_scale = drift_scale
        self.t_0 = t_0
        self.read_noise_scale = read_noise_scale
        self.t_read = t_read

[docs]    @no_grad()
    def apply_programming_noise_to_conductance(self, g_target: Tensor) -> Tensor:
        """Apply programming noise to a target conductance Tensor.

        Programming noise with additive Gaussian noise with
        conductance dependency of the variance given by a 2-degree
        polynomial.
        """
        mat = 1
        sig_prog = self.prog_coeff[0]
        for coeff in self.prog_coeff[1:]:
            mat *= g_target / self.g_max
            sig_prog += mat * coeff

        g_prog = g_target + self.prog_noise_scale * sig_prog * randn_like(g_target)
        g_prog.clamp_(min=0.0)  # no negative conductances allowed
        return g_prog

[docs]    @no_grad()
    def generate_drift_coefficients(self, g_target: Tensor) -> Tensor:
        """Return drift coefficients ``nu``."""

        mu_drift = self.drift_nu_mean
        sig_drift = self.drift_nu_std
        nu_drift = torch_abs(mu_drift + sig_drift * randn_like(g_target)).clamp(min=0.0)
        return nu_drift * self.drift_scale

[docs]    @no_grad()
    def apply_drift_noise_to_conductance(
        self, g_prog: Tensor, drift_noise_param: Tensor, t_inference: float
    ) -> Tensor:
        """Apply the noise and drift up to the assumed inference time
        point."""
        t = t_inference + self.t_0

        # drift
        if t > self.t_0:
            g_drift = g_prog * ((t / self.t_0) ** (-drift_noise_param))
        else:
            g_drift = g_prog

        # expected accumulated 1/f noise since start of programming at t=0
        if t > 0:
            sig_noise = sqrt(numpy_log((t + self.t_read) / (2 * self.t_read)))
            g_final = g_drift + torch_abs(
                g_drift / self.g_max
            ) * self.read_noise_scale * sig_noise * randn_like(g_drift)
        else:
            g_final = g_prog

        return g_final.clamp(min=0.0)