Source code for elisa.plot.residuals

"""Residual calculation."""

from __future__ import annotations

import numpy as np
from numpy.typing import NDArray
from scipy.special import xlogy
from scipy.stats import chi2, norm, poisson


[docs] def combine_residuals( r1: NDArray, r2: NDArray, dof: int | float | NDArray, sign: NDArray | None = None, ) -> NDArray: if isinstance(dof, float | int): dof = float(dof) else: dof = np.asarray(dof, float) if not (dof.shape == r1.shape == r2.shape): raise ValueError('shape of r1, r2, and dof should be the same') if sign is None: sign = 1.0 r2_2d = r1 * r1 + r2 * r2 if dof == 1.0: return sign * np.sqrt(r2_2d) mask = r2_2d <= dof r = np.empty(r1.shape) if isinstance(dof, float): r[mask] = norm.isf(0.5 - 0.5 * chi2.cdf(r2_2d[mask], dof)) r[~mask] = norm.isf(0.5 * chi2.sf(r2_2d[~mask], dof)) else: r[mask] = norm.isf(0.5 - 0.5 * chi2.cdf(r2_2d[mask], dof[mask])) r[~mask] = norm.isf(0.5 * chi2.sf(r2_2d[~mask], dof[~mask])) return sign * r
[docs] def pearson_residuals( observed: NDArray, expected: NDArray, std: NDArray | None = None, ) -> NDArray: """Calculate Pearson residuals. Parameters ---------- observed : ndarray Observed counts. expected : ndarray Expected counts. std : ndarray, optional Standard deviation of the observed counts. If None, assumed to be the square root of the expected counts. Returns ------- ndarray Pearson residuals. """ if std is None: std = np.sqrt(expected) return (observed - expected) / std
[docs] def pit_poisson( k: NDArray, lam: NDArray, minus: bool = False, ) -> NDArray | tuple[NDArray, NDArray]: """Probability integral transform of poisson data fit. Parameters ---------- k : ndarray The data values. lam : ndarray The model values. minus : bool, optional Whether to calculate the PIT of `k` - 1. Returns ------- ndarray, or tuple of ndarray The probability integral transform values. """ pit = poisson.cdf(k, lam) if not minus: return pit else: pit_minus = poisson.cdf(k - 1, lam) return pit_minus, pit
[docs] def quantile_residuals_poisson( k: NDArray, lam: NDArray, keep_sign: bool = False, random: bool = True, seed: int = 42, ) -> NDArray: """Normalized quantile residuals for fit of Poisson data. Parameters ---------- k : ndarray Data value. lam : ndarray Model value. keep_sign : bool, optional Whether to keep sign of the residuals as ``sign(k - lam)``. The default is False. random : bool, optional Whether to add random noise so that residuals are normally distributed. seed : int, optional Random seed to use in adding noise. The default is 42. Returns ------- ndarray The quantile residuals. Notes ----- When `random` is True, the residuals are theoretically guaranteed to be normally distributed. If `keep_sign` is also True, then the normality of the residuals may not be preserved. The recommended use is only `random` or `keep_sign` being True. References ---------- .. [1] doi:10.2307/1390802 .. [2] doi:10.1371/journal.pone.0181790 .. [3] doi:10.1111/j.1541-0420.2009.01191.x .. [4] doi:10.1186/s12874-020-01055-2 """ k_shape = k.shape lam_shape = lam.shape assert k_shape == lam_shape[-len(k_shape) :] if k_shape != lam_shape: k = np.full(lam_shape, k) mask1 = k < lam mask2 = ~mask1 k1 = k[mask1] k2 = k[mask2] lam1 = lam[mask1] lam2 = lam[mask2] cdf = poisson.cdf(k1, lam1) sf = poisson.sf(k2, lam2) if random: # randomized probability integral transform rng = np.random.default_rng(seed) cdf_right = cdf cdf_left = poisson.cdf(k1 - 1, lam1) # when k < 0, poisson cdf is 0 sf_right = sf sf_left = poisson.sf(k2 - 1, lam2) # when k < 0, poisson sf is 1 cdf = rng.uniform(cdf_left, cdf_right) sf = rng.uniform(sf_right, sf_left) r = np.empty(lam_shape) r[mask1] = np.where( cdf != 0.0, norm.ppf(cdf), (k1 - lam1) / np.sqrt(lam1), ) r[mask2] = np.where( sf != 0.0, norm.isf(sf), (k2 - lam2) / np.sqrt(lam2), ) if keep_sign: r = np.where(k >= lam, 1.0, -1.0) * np.abs(r) return r
[docs] def pit_poisson_poisson( k1: NDArray, k2: NDArray, lam1: NDArray, lam2: NDArray, ratio: float | NDArray, seed: int = 42, minus: bool = False, nsim: int = 10000, ) -> NDArray | tuple[NDArray, NDArray]: """Probability integral transform of two poisson data fit. Parameters ---------- k1, k2 : ndarray The first and second data values. lam1, lam2 : ndarray The first and second model values. ratio : float or ndarray Background ratio. seed : int, optional Random seed to use in simulation. The default is 42. minus : bool, optional Whether to calculate the PIT of the next small quantity given `m1` and `m2`. The default is False. nsim : int, optional The number of simulations to generate. The default is 10000. Returns ------- ndarray, or tuple of ndarray The probability integral transform values. """ k_shape = np.shape(k1) lam_shape = np.shape(lam1) assert k_shape == np.shape(k2) assert lam_shape == np.shape(lam2) assert k_shape == lam_shape[-len(k_shape) :] obs = k1 - ratio * k2 rng = np.random.default_rng(seed) sim = rng.poisson(lam1, size=(nsim,) + lam_shape) - ratio * rng.poisson( lam2, size=(nsim,) + lam_shape ) pit = np.count_nonzero(sim <= obs, axis=0) / nsim if not minus: return pit else: obs_minus = np.maximum(k1 - 1.0 - ratio * k2, k1 - ratio * (k2 + 1.0)) pit_minus = np.count_nonzero(sim <= obs_minus, axis=0) / nsim return pit_minus, pit
[docs] def quantile_residuals_poisson_poisson( k1: NDArray, k2: NDArray, lam1: NDArray, lam2: NDArray, ratio: float | NDArray, random: bool = True, seed: int = 42, nsim: int = 10000, ) -> tuple[NDArray, NDArray, NDArray]: """Normalized quantile residuals for joint fit of two Poisson data. .. note:: The calculation is based on Monte Carlo simulation. It is also possible to calculate the quantile residuals by inverting the Cornish-Fisher expansion. See the following link for more details: * https://stats.stackexchange.com/a/73070 * https://www.value-at-risk.net/the-cornish-fisher-expansion/ * https://faculty.washington.edu/ezivot/econ589/ssrn-id1997178.pdf Parameters ---------- k1, k2 : ndarray The first and second data values. lam1, lam2 : ndarray The first and second model values. ratio : float or ndarray Background ratio. random : bool, optional Whether to add random noise so that residuals are normally distributed. seed : int, optional Random seed to use in adding noise and simulation. The default is 42. nsim : int, optional The number of simulations to generate. The default is 10000. Returns ------- tuple of ndarray The quantile residuals, and flags to mark if the residuals are lower or upper limit. """ res = pit_poisson_poisson(k1, k2, lam1, lam2, ratio, seed, random, nsim) if random: pit = np.random.default_rng(seed).uniform(*res) else: pit = res r = norm.ppf(pit) lower_mask = pit == 0.0 upper_mask = pit == 1.0 r[lower_mask] = norm.ppf(1.0 / nsim) r[upper_mask] = norm.ppf(1.0 - 1.0 / nsim) lower = np.full(lam1.shape, False) upper = np.full(lam1.shape, False) lower[lower_mask] = True upper[upper_mask] = True return r, lower, upper
[docs] def pit_poisson_normal( k: NDArray, lam: NDArray, v: NDArray, mu: NDArray, sigma: NDArray, ratio: float | NDArray, seed: int = 42, nsim: int = 10000, ) -> NDArray: """Probability integral transform of poisson and normal data fit. Parameters ---------- k : ndarray The Poisson data value. lam : ndarray The Poisson model value. v : ndarray The normal data value. mu : ndarray The normal model value. sigma : ndarray The normal model sigma. ratio : float or ndarray Background ratio. seed : int, optional Random seed to use in simulation. The default is 42. nsim : int, optional The number of simulations to generate. The default is 10000. Returns ------- ndarray, or tuple of ndarray The probability integral transform values. """ k_shape = np.shape(k) lam_shape = np.shape(lam) assert k_shape == np.shape(v) assert lam_shape == np.shape(mu) assert k_shape == np.shape(sigma) == lam_shape[-len(k_shape) :] obs = k - ratio * v rng = np.random.default_rng(seed) sim = rng.poisson(lam, size=(nsim,) + lam_shape) - ratio * rng.normal( mu, sigma, size=(nsim,) + lam_shape ) pit = np.count_nonzero(sim <= obs, axis=0) / nsim return pit
[docs] def quantile_residuals_poisson_normal( k: NDArray, lam: NDArray, v: NDArray, mu: NDArray, sigma: NDArray, ratio: float | NDArray, seed: int = 42, nsim: int = 10000, ) -> tuple[NDArray, NDArray, NDArray]: """Normalized quantile residuals for joint fit of Poisson and normal data. .. note:: The calculation is based on Monte Carlo simulation. It is also possible to calculate the quantile residuals by inverting the Cornish-Fisher expansion. See the following link for more details: * https://stats.stackexchange.com/a/73070 * https://www.value-at-risk.net/the-cornish-fisher-expansion/ * https://faculty.washington.edu/ezivot/econ589/ssrn-id1997178.pdf Parameters ---------- k : ndarray The Poisson data value. lam : ndarray The Poisson model value. v : ndarray The normal data value. mu : ndarray The normal model value. sigma : ndarray The normal model sigma. ratio : float or ndarray Background ratio. seed : int, optional Random seed to use in simulation. The default is 42. nsim : int, optional The number of simulations to generate. The default is 10000. Returns ------- tuple of ndarray The quantile residuals, and flags to mark if the residuals are lower or upper limit. """ pit = pit_poisson_normal(k, lam, v, mu, sigma, ratio, seed, nsim) r = norm.ppf(pit) lower_mask = pit == 0.0 upper_mask = pit == 1.0 r[lower_mask] = norm.ppf(1.0 / nsim) r[upper_mask] = norm.ppf(1.0 - 1.0 / nsim) lower = np.full(lam.shape, False) upper = np.full(lam.shape, False) lower[lower_mask] = True upper[upper_mask] = True return r, lower, upper
[docs] def deviance_residuals_poisson(k: NDArray, lam: NDArray) -> NDArray: """Deviance residuals for fit of Poisson data. Parameters ---------- k : ndarray Data value. lam : ndarray Model value. Returns ------- ndarray The deviance residuals. References ---------- .. [1] McCullagh P, Nelder JA. Generalized Linear Models, 2nd ed., pp. 39 .. [2] Spiegelhalter et al., https://doi.org/10.1111/1467-9868.00353 """ deviance = 2.0 * (xlogy(k, k / lam) - (k - lam)) return np.where(k >= lam, 1.0, -1.0) * np.sqrt(deviance)
[docs] def deviance_residuals_poisson_poisson( k1: NDArray, k2: NDArray, lam1: NDArray, lam2: NDArray, sign: NDArray | None = None, dof: int | float | NDArray = 2.0, ) -> NDArray: """Deviance residuals [1]_ [2]_ for joint fit of two Poisson data. Parameters ---------- k1, k2 : ndarray The first and second data value. lam1, lam2 : ndarray The first and second model value. sign : ndarray, optional The sign of output residuals. The default is None. dof : int, float, or NDArray, optional The degree of freedom of each data pair. The default is 2. Returns ------- ndarray The deviance residuals. References ---------- .. [1] McCullagh P, Nelder JA. Generalized Linear Models, 2nd ed., pp. 39 .. [2] Spiegelhalter et al., https://doi.org/10.1111/1467-9868.00353 """ d = 2.0 * (xlogy(k1, k1 / lam1) - (k1 - lam1)) d += 2.0 * (xlogy(k2, k2 / lam2) - (k2 - lam2)) if np.all(dof == 1.0): r = np.sqrt(d) else: if isinstance(dof, float | int): dof = np.full(k1.shape, dof) mask = d <= dof r = np.empty(k1.shape) r[mask] = norm.ppf(chi2.cdf(d[mask], dof[mask])) r[~mask] = norm.isf(chi2.sf(d[~mask], dof[~mask])) if sign is not None: r = sign * np.abs(r) return r
[docs] def deviance_residuals_poisson_normal( k: NDArray, lam: NDArray, v: NDArray, mu: NDArray, sigma: NDArray, sign: NDArray | None = None, dof: int | float | NDArray = 2.0, ) -> NDArray: """Deviance residuals [1]_ [2]_ for joint fit of Poisson and normal data. Parameters ---------- k : ndarray The first data value. lam : ndarray The first model value. v : ndarray Data value. mu : ndarray Model value. sigma : ndarray Model sigma. sign : ndarray, optional The sign of output residuals. The default is None. dof : int, float, or NDArray, optional The degree of freedom of each data pair. The default is 2. Returns ------- ndarray The deviance residuals. References ---------- .. [1] McCullagh P, Nelder JA. Generalized Linear Models, 2nd ed., pp. 39 .. [2] Spiegelhalter et al., https://doi.org/10.1111/1467-9868.00353 """ d = 2.0 * (xlogy(k, k / lam) - (k - lam)) v = (v - mu) / sigma d += v * v if np.all(dof == 1.0): r = np.sqrt(d) else: if isinstance(dof, float | int): dof = np.full(k.shape, dof) mask = d <= dof r = np.empty(k.shape) r[mask] = norm.ppf(chi2.cdf(d[mask], dof[mask])) r[~mask] = norm.isf(chi2.sf(d[~mask], dof[~mask])) if sign is not None: r = sign * np.abs(r) return r