Source code for aihwkit.inference.noise.custom

# -*- coding: utf-8 -*-

# (C) Copyright 2020, 2021, 2022, 2023, 2024 IBM. All Rights Reserved.
#
# Licensed under the MIT license. See LICENSE file in the project root for details.

"""Phenomenological noise model for inference."""

from copy import deepcopy
from typing import List, Optional

from numpy import log as numpy_log
from numpy import sqrt
from torch import abs as torch_abs
from torch import randn_like, Tensor
from torch.autograd import no_grad

from aihwkit.inference.noise.base import BaseNoiseModel
from aihwkit.inference.converter.base import BaseConductanceConverter
from aihwkit.inference.converter.conductance import SinglePairConductanceConverter


[docs]class StateIndependentNoiseModel(BaseNoiseModel): # pylint: disable=too-many-instance-attributes r"""Standard noise model that has a non-conductance dependent drift and multiplicative read (1/f) noise. Programming noise is state-independent by default, however, it can be made conductance dependent, since the expected programming noise strength is modeled with a second-order polynomial in general. **Programming noise** is thus given by: .. math:: \sigma_text{programming noise}=\gamma\,\left(c_0 + c_1 \frac{g_T}{g_\text{max}} + + c_2 \frac{g_T^2}{g_\text{max}^2}\right) where :math:`\gamma` is a additional convenience scale and :math:`g_T` is the target conductance established from the given ``g_converter`` from the weight matrix. The default programming noise is constant (state independent): :math:`c_0=0.2\mu\mathrm{S}` and other coefficient set to :math:`0.0`. **Drift** is for each device is computed as .. math:: g_\text{drift}(t) = g_\text{prog}(t / t_0) ^{- \nu} with the drift coefficient determined at the beginning for each device with .. math:: \nu= \zeta\, |\nu_\text{mean} + \nu_\text{std}\xi|_+ where :math:`\xi` is a Gaussian random number and :math:`|\cdot|_+` rectifies negative value to zero. :math:`\zeta` is an additional drift scale. **Read noise** is given by .. math:: \sigma_\text{read} = \rho \frac{g_\text{drift}(t)}{g_\text{max}} \sqrt{\log\left(\frac{t + t_\text{read}}{2 t_\text{read}}\right)} This :math:`\sigma_\text{read}` is then used to add Gaussian noise of this magnitude to the drifted conductance. The read noise scale :math:`\rho` can be used to scale the read noise. Args: g_converter: instantiated class of the conductance converter (defaults to single pair) g_max: In :math:`\mu S`, the maximal conductance, ie the value the absolute max of the weights will be mapped to. prog_coeff: programming polynomial coefficients :math:`c_i` in :math:`\mu S`. Default is constant :math:`c_0=0.2` and other coefficient set to 0.0. prog_noise_scale: scale :math:\gamma: for the programming noise drift_nu_mean: mean :math:`\nu_\text{mean}` of power-law drift coefficient (:math:`\nu`) (before ``drift_scale`` :math:`\zeta` is applied). drift_nu_std: device-to-device variability :math:`\nu_\text{std}` of the power-law drift coefficient (before ``drift_scale`` is applied) drift_scale: additional scale :math:`\zeta` applied to all drawn drift coefficients t_0: parameter of the drift (first reading time), see above. Note: The ``t_inference`` is relative to this time ``t0`` e.g. ``t_inference`` counts from the completion of the programming of a device. read_noise_scale: scale :math:`\rho` for scaling the read and accumulated noise :math:`1/f`. t_read: parameter of the :math:`1/f` noise (in seconds) """ def __init__( # pylint: disable=too-many-arguments self, g_converter: Optional[BaseConductanceConverter] = None, g_max: Optional[float] = None, prog_coeff: Optional[List[float]] = None, prog_noise_scale: float = 1.0, drift_nu_mean: float = 0.1, drift_nu_std: float = 0.05, drift_scale: float = 1.0, t_0: float = 20.0, read_noise_scale: float = 1.0, t_read: float = 250.0e-9, ): g_converter = deepcopy(g_converter) or SinglePairConductanceConverter(g_max=g_max) super().__init__(g_converter) self.g_max = getattr(self.g_converter, "g_max", g_max) if self.g_max is None: raise ValueError("g_max cannot be established from g_converter") self.prog_coeff = [0.2, 0.0, 0.0] if prog_coeff is None else prog_coeff self.prog_noise_scale = prog_noise_scale self.drift_nu_mean = drift_nu_mean self.drift_nu_std = drift_nu_std self.drift_scale = drift_scale self.t_0 = t_0 self.read_noise_scale = read_noise_scale self.t_read = t_read
[docs] @no_grad() def apply_programming_noise_to_conductance(self, g_target: Tensor) -> Tensor: """Apply programming noise to a target conductance Tensor. Programming noise with additive Gaussian noise with conductance dependency of the variance given by a 2-degree polynomial. """ mat = 1 sig_prog = self.prog_coeff[0] for coeff in self.prog_coeff[1:]: mat *= g_target / self.g_max sig_prog += mat * coeff g_prog = g_target + self.prog_noise_scale * sig_prog * randn_like(g_target) g_prog.clamp_(min=0.0) # no negative conductances allowed return g_prog
[docs] @no_grad() def generate_drift_coefficients(self, g_target: Tensor) -> Tensor: """Return drift coefficients ``nu``.""" mu_drift = self.drift_nu_mean sig_drift = self.drift_nu_std nu_drift = torch_abs(mu_drift + sig_drift * randn_like(g_target)).clamp(min=0.0) return nu_drift * self.drift_scale
[docs] @no_grad() def apply_drift_noise_to_conductance( self, g_prog: Tensor, drift_noise_param: Tensor, t_inference: float ) -> Tensor: """Apply the noise and drift up to the assumed inference time point.""" t = t_inference + self.t_0 # drift if t > self.t_0: g_drift = g_prog * ((t / self.t_0) ** (-drift_noise_param)) else: g_drift = g_prog # expected accumulated 1/f noise since start of programming at t=0 if t > 0: sig_noise = sqrt(numpy_log((t + self.t_read) / (2 * self.t_read))) g_final = g_drift + torch_abs( g_drift / self.g_max ) * self.read_noise_scale * sig_noise * randn_like(g_drift) else: g_final = g_prog return g_final.clamp(min=0.0)