Source code for aihwkit.inference.noise.custom

# -*- coding: utf-8 -*-

# (C) Copyright 2020, 2021, 2022, 2023, 2024 IBM. All Rights Reserved.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Phenomenological noise model for inference."""

from copy import deepcopy
from typing import List, Optional

from numpy import log as numpy_log
from numpy import sqrt
from torch import abs as torch_abs
from torch import randn_like, Tensor
from torch.autograd import no_grad

from aihwkit.inference.noise.base import BaseNoiseModel
from aihwkit.inference.converter.base import BaseConductanceConverter
from aihwkit.inference.converter.conductance import SinglePairConductanceConverter


[docs]class StateIndependentNoiseModel(BaseNoiseModel): # pylint: disable=too-many-instance-attributes r"""Standard noise model that has a non-conductance dependent drift and multiplicative read (1/f) noise. Programming noise is state-independent by default, however, it can be made conductance dependent, since the expected programming noise strength is modeled with a second-order polynomial in general. **Programming noise** is thus given by: .. math:: \sigma_text{programming noise}=\gamma\,\left(c_0 + c_1 \frac{g_T}{g_\text{max}} + + c_2 \frac{g_T^2}{g_\text{max}^2}\right) where :math:`\gamma` is a additional convenience scale and :math:`g_T` is the target conductance established from the given ``g_converter`` from the weight matrix. The default programming noise is constant (state independent): :math:`c_0=0.2\mu\mathrm{S}` and other coefficient set to :math:`0.0`. **Drift** is for each device is computed as .. math:: g_\text{drift}(t) = g_\text{prog}(t / t_0) ^{- \nu} with the drift coefficient determined at the beginning for each device with .. math:: \nu= \zeta\, |\nu_\text{mean} + \nu_\text{std}\xi|_+ where :math:`\xi` is a Gaussian random number and :math:`|\cdot|_+` rectifies negative value to zero. :math:`\zeta` is an additional drift scale. **Read noise** is given by .. math:: \sigma_\text{read} = \rho \frac{g_\text{drift}(t)}{g_\text{max}} \sqrt{\log\left(\frac{t + t_\text{read}}{2 t_\text{read}}\right)} This :math:`\sigma_\text{read}` is then used to add Gaussian noise of this magnitude to the drifted conductance. The read noise scale :math:`\rho` can be used to scale the read noise. Args: g_converter: instantiated class of the conductance converter (defaults to single pair) g_max: In :math:`\mu S`, the maximal conductance, ie the value the absolute max of the weights will be mapped to. prog_coeff: programming polynomial coefficients :math:`c_i` in :math:`\mu S`. Default is constant :math:`c_0=0.2` and other coefficient set to 0.0. prog_noise_scale: scale :math:\gamma: for the programming noise drift_nu_mean: mean :math:`\nu_\text{mean}` of power-law drift coefficient (:math:`\nu`) (before ``drift_scale`` :math:`\zeta` is applied). drift_nu_std: device-to-device variability :math:`\nu_\text{std}` of the power-law drift coefficient (before ``drift_scale`` is applied) drift_scale: additional scale :math:`\zeta` applied to all drawn drift coefficients t_0: parameter of the drift (first reading time), see above. Note: The ``t_inference`` is relative to this time ``t0`` e.g. ``t_inference`` counts from the completion of the programming of a device. read_noise_scale: scale :math:`\rho` for scaling the read and accumulated noise :math:`1/f`. t_read: parameter of the :math:`1/f` noise (in seconds) """ def __init__( # pylint: disable=too-many-arguments self, g_converter: Optional[BaseConductanceConverter] = None, g_max: Optional[float] = None, prog_coeff: Optional[List[float]] = None, prog_noise_scale: float = 1.0, drift_nu_mean: float = 0.1, drift_nu_std: float = 0.05, drift_scale: float = 1.0, t_0: float = 20.0, read_noise_scale: float = 1.0, t_read: float = 250.0e-9, ): g_converter = deepcopy(g_converter) or SinglePairConductanceConverter(g_max=g_max) super().__init__(g_converter) self.g_max = getattr(self.g_converter, "g_max", g_max) if self.g_max is None: raise ValueError("g_max cannot be established from g_converter") self.prog_coeff = [0.2, 0.0, 0.0] if prog_coeff is None else prog_coeff self.prog_noise_scale = prog_noise_scale self.drift_nu_mean = drift_nu_mean self.drift_nu_std = drift_nu_std self.drift_scale = drift_scale self.t_0 = t_0 self.read_noise_scale = read_noise_scale self.t_read = t_read
[docs] @no_grad() def apply_programming_noise_to_conductance(self, g_target: Tensor) -> Tensor: """Apply programming noise to a target conductance Tensor. Programming noise with additive Gaussian noise with conductance dependency of the variance given by a 2-degree polynomial. """ mat = 1 sig_prog = self.prog_coeff[0] for coeff in self.prog_coeff[1:]: mat *= g_target / self.g_max sig_prog += mat * coeff g_prog = g_target + self.prog_noise_scale * sig_prog * randn_like(g_target) g_prog.clamp_(min=0.0) # no negative conductances allowed return g_prog
[docs] @no_grad() def generate_drift_coefficients(self, g_target: Tensor) -> Tensor: """Return drift coefficients ``nu``.""" mu_drift = self.drift_nu_mean sig_drift = self.drift_nu_std nu_drift = torch_abs(mu_drift + sig_drift * randn_like(g_target)).clamp(min=0.0) return nu_drift * self.drift_scale
[docs] @no_grad() def apply_drift_noise_to_conductance( self, g_prog: Tensor, drift_noise_param: Tensor, t_inference: float ) -> Tensor: """Apply the noise and drift up to the assumed inference time point.""" t = t_inference + self.t_0 # drift if t > self.t_0: g_drift = g_prog * ((t / self.t_0) ** (-drift_noise_param)) else: g_drift = g_prog # expected accumulated 1/f noise since start of programming at t=0 if t > 0: sig_noise = sqrt(numpy_log((t + self.t_read) / (2 * self.t_read))) g_final = g_drift + torch_abs( g_drift / self.g_max ) * self.read_noise_scale * sig_noise * randn_like(g_drift) else: g_final = g_prog return g_final.clamp(min=0.0)