Source code for tools.basicStats

#!/usr/bin/env python3

"""
.. module:: basicStats
   :synopsis: a module thats collects various statistical codes, that \
   are shared among other submodules.

.. moduleauthor:: Wolfgang Waltenberger <wolfgang.waltenberger@gmail.com>
"""

from scipy import stats, optimize
from smodels.tools.smodelsLogging import logger
from smodels.tools.physicsUnits import fb
from scipy.special import erf
import numpy as np
from smodels.experiment.exceptions import SModelSExperimentError as SModelSError
from typing import Text, Optional, Union

__all__ = [ "CLsfromNLL", "determineBrentBracket", "chi2FromLmax" ]

[docs]def CLsfromNLL(
    nllA: float, nll0A: float, nll: float, nll0: float,
    return_type: Text = "CLs-alpha" ) -> float:
    """
    compute the CLs - alpha from the NLLs
    TODO: following needs explanation

    :param nllA:
    :param nll0A:
    :param nll:
    :param nll0:
    :param return_type: (Text) can be "CLs-alpha", "1-CLs", "CLs" \
                        CLs-alpha: returns CLs - 0.05 \
                        1-CLs: returns 1-CLs value \
                        CLs: returns CLs value
    :return: Cls-type value, see above
    """
    assert return_type in ["CLs-alpha", "1-CLs", "CLs"], f"Unknown return type: {return_type}."
    qmu = 0.0 if 2 * (nll - nll0) < 0.0 else 2 * (nll - nll0)
    sqmu = np.sqrt(qmu)
    qA = 2 * (nllA - nll0A)
    if qA < 0.0:
        qA = 0.0
    sqA = np.sqrt(qA)
    if qA >= qmu:
        CLsb = 1.0 - stats.multivariate_normal.cdf(sqmu)
        CLb = stats.multivariate_normal.cdf(sqA - sqmu)
    else:
        CLsb = 1.0 if qA == 0.0 else 1.0 - stats.multivariate_normal.cdf((qmu + qA) / (2 * sqA))
        CLb = 1.0 if qA == 0.0 else 1.0 - stats.multivariate_normal.cdf((qmu - qA) / (2 * sqA))

    CLs = CLsb / CLb if CLb > 0 else 0.0

    if return_type == "1-CLs":
        return 1.0 - CLs
    elif return_type == "CLs":
        return CLs

    return CLs - 0.05

[docs]def determineBrentBracket(mu_hat, sigma_mu, rootfinder,
         allowNegative = True ):
    """find a, b for brent bracketing

    :param mu_hat: mu that maximizes likelihood
    :param sigm_mu: error on mu_hat (not too reliable)
    :param rootfinder: function that finds the root (usually root_func)
    :param allowNegative: if False, then do not allow a or b to become negative
    :returns: the interval a,b
    """
    sigma_mu = max(sigma_mu, 0.5)  # there is a minimum on sigma_mu
    sigma_mu = min(sigma_mu, 100.) # there is a maximum on sigma_mu
    # the root should be roughly at mu_hat + 2*sigma_mu
    a = mu_hat + 1.5 * sigma_mu
    ra = rootfinder(a)
    ntrials = 20
    i = 0
    foundExtra = False
    while ra < 0.0:
        # if this is negative, we move it to the left
        i += 1
        a -= (i**2.0) * sigma_mu
        ra = rootfinder(a)
        if i > ntrials or a < -10000.0 or ra is None or ( a < 0 and not allowNegative ):
            avalues = [0.0, 1.0, -1.0, 3.0, -3.0, 10.0, -10.0, 0.1, -0.1, 0.01, -0.01, .001, -.001, 100., -100., .0001, -.0001 ]
            if not allowNegative:
                avalues = [0.0, 1.0, 3.0, 10.0, 0.1, 0.01, .001, 100., .0001 ]
            for a in avalues:
                ra = rootfinder(a)
                if ra is None: # if cls computation failed, try with next a value
                    continue
                if ra > 0.0:
                    foundExtra = True
                    break
            if not foundExtra:
                logger.error(
                    f"cannot find an a that is left of the root. last attempt, a={a:.2f}, root = {ra:.2f}."
                )
                logger.error(f"mu_hat={mu_hat:.2f}, sigma_mu={sigma_mu:.2f}")
                raise SModelSError(
                    f"cannot find an a that is left of the root. last attempt, a={a:.2f}, root = {ra:.2f}."
                )
    i = 0
    foundExtra = False
    b = mu_hat + 2.5 * sigma_mu
    rb = rootfinder(b)
    while rb > 0.0:
        # if this is positive, we move it to the right
        i += 1
        b += (i**2.0) * sigma_mu
        rb = rootfinder(b)
        if rb is None: # if cls computation failed, try with a bigger b value
            continue
        closestr, closest = float("inf"), None
        if i > ntrials or ( b < 0 and not allowNegative ):
            bvalues = [1.0, 0.0, 3.0, -1.0, 10.0, -3.0, 0.1, -0.1, -10.0, 100.0, -100.0, 1000.0, .01, -.01, .001, -.001, 10000.0, 100000.0, 1000000.0 ]
            if not allowNegative:
                bvalues = [1.0, 0.0, 3.0, 10.0, 0.1, 100.0, 1000.0, .01, .001, 10000.0, 100000.0, 1000000.0 ]
            for b in bvalues:
                rb = rootfinder(b)
                if rb is None: # if cls computation failed, try with next b value
                    continue
                if rb < 0.0:
                    foundExtra = True
                    break
                if rb < closestr:
                    closestr = rb
                    closest = b
            if not foundExtra:
                logger.error(f"cannot find a b that is right of the root (i.e. rootfinder(b) < 0).")
                logger.error(f"closest to zero rootfinder({closest})={closestr}")
                logger.error(f"mu_hat was at {mu_hat:.2f} sigma_mu at {sigma_mu:.2f}")
                raise SModelSError()
    return a, b

def deltaChi2FromLlhd(likelihood):
    """compute the delta chi2 value from a likelihood (convenience function)"""
    if likelihood == 0.0:
        return 1e10  # a very high number is good
    elif likelihood is None:
        return None

    return -2.0 * np.log(likelihood)

[docs]def chi2FromLmax(llhd, lmax):
    """compute the chi2 from likelihood and lmax"""
    if llhd is None or lmax is None:
        return None
    chi2 = 0.0
    if llhd > 1e-200:
        from math import log

        chi2 = 2 * log(lmax / llhd)
    if chi2 < 0.0 and llhd < 1e-100:
        # numerical inaccuracy
        chi2 = 0.0
    return chi2