#!/usr/bin/env python3
"""
.. module:: truncatedGaussian
:synopsis: a module that contains the code that computes an approximate
Gaussian likelihood from an expected an observer upper limit. See
https://arxiv.org/abs/1202.3415.
.. moduleauthor:: Wolfgang Waltenberger <wolfgang.waltenberger@gmail.com>
"""
__all__ = [ "TruncatedGaussians" ]
from scipy import stats, optimize
from smodels.base.smodelsLogging import logger
from smodels.base.physicsUnits import fb
from scipy.special import erf
import numpy as np
from smodels.statistics.exceptions import SModelSStatisticsError as SModelSError
from typing import Text, Optional, Union, Dict
from smodels.statistics.basicStats import deltaChi2FromLlhd
[docs]class TruncatedGaussians:
""" likelihood computer based on the trunacated Gaussian approximation, see
arXiv:1202.3415 """
__slots__ = [ "upperLimitOnMu", "expectedUpperLimitOnMu", "corr",
"sigma_mu", "denominator", "cl" ]
# the correction we introduced a long time ago was have exclusions
# in mind only. for discovery mode we need to correct differently
newCorrectionType = False
def __init__ ( self, upperLimitOnMu : float, expectedUpperLimitOnMu : float,
corr : Optional[float] = 0.6, cl=.95 ):
"""
:param upperLimitOnMu: observed upper limit on signal strength mu
:param expectedUpperLimitOnMu: expected upper limit on signal strength mu
:param corr: correction factor:
ULexp_mod = ULexp / (1. - corr*((ULobs-ULexp)/(ULobs+ULexp)))
When comparing with likelihoods constructed from efficiency maps,
a factor of corr = 0.6 has been found to result in the best approximations.
:param cl: confidence level
"""
assert type(upperLimitOnMu) in [ float, np.float64, np.float32 ], f"the upper limits must be given as floats not {type(upperLimitOnMu)}, are you providing upper limits on xsecs?"
# the old type of correction
if not self.newCorrectionType and corr > 0.0 and upperLimitOnMu > expectedUpperLimitOnMu:
f = 1.0 - corr * ((upperLimitOnMu - expectedUpperLimitOnMu) / (upperLimitOnMu + expectedUpperLimitOnMu))
expectedUpperLimitOnMu = expectedUpperLimitOnMu / f
self.upperLimitOnMu = upperLimitOnMu
self.expectedUpperLimitOnMu = expectedUpperLimitOnMu
self.corr = corr
self.sigma_mu = self._getSigmaMu() # the expected scale, eq 3.24 in arXiv:1202.3415
self.denominator = np.sqrt(2.0) * self.sigma_mu
self.cl = cl
[docs] def likelihood ( self, mu : Union[float,None], return_nll : Optional[bool]=False,
allowNegativeSignals : Optional[bool] = True,
corr : Optional[float] = 0.6,
expected : Union[Text,bool] = False ) -> Union[None,float]:
""" return the likelihood, as a function of mu
:param mu: number of signal events, if None then mu = muhat
:param return_nll: if True, return negative log likelihood
:param allowNegativeSignals: if True, then allow muhat to become negative,\
else demand that muhat >= 0. In the presence of underfluctuations\
in the data, setting this to True results in more realistic\
approximate likelihoods.
:returns: likelihood (float)
"""
sllhd = "llhd"
if return_nll:
sllhd = "nll"
muhat, sigma_mu = float("inf"), float("inf")
dsig = self._likelihoodOfMu ( mu, return_nll=return_nll,
allowNegativeSignals = allowNegativeSignals, corr = corr,
expected = expected )
ret = dsig[sllhd]
return ret
[docs] def lmax ( self, return_nll : Optional[bool]=False,
allowNegativeSignals : Optional[bool] = True,
corr : Optional[float] = 0.6,
expected : Union[bool,Text] = False ) -> Dict:
"""
Return the likelihood, as a function of mu
:param mu: number of signal events, if None then mu = muhat
:param return_nll: if True, return negative log likelihood
:param allowNegativeSignals: if True, then allow muhat to become negative,
else demand that muhat >= 0. In the presence of underfluctuations
in the data, setting this to True results in more realistic
approximate likelihoods.
:returns: dictionary with likelihood (float), muhat, and sigma_mu
"""
default = { "muhat": None, "sigma_mu": None, "lmax": None }
sllhd = "llhd"
if return_nll:
sllhd = "nll"
muhat, sigma_mu = float("inf"), float("inf")
dsig = self._likelihoodOfMu ( 1., return_nll=return_nll,
allowNegativeSignals = allowNegativeSignals, corr = corr )
muhat, sigma_mu = dsig["muhat"], dsig["sigma_mu"]
# llhd evaluated at mu_hat
if expected:
muhat = 0.
lmax = self.likelihood ( muhat, return_nll=return_nll )
ret = { "muhat": muhat, "sigma_mu": sigma_mu, "lmax": lmax }
return ret
def _likelihoodOfMu ( self, mu : Union[float,None],
return_nll : Optional[bool] = False,
allowNegativeSignals : Optional[bool] = True,
corr : Optional[float] = 0.6,
expected : Union[bool,Text] = False ) -> float:
""" return the likelihood, as a function of nsig
:param mu: signal strength
:param return_nll: if True, return negative log likelihood
:param allowNegativeSignals: if True, then allow muhat to become negative,
else demand that muhat >= 0. In the presence of underfluctuations
in the data, setting this to True results in more realistic
approximate likelihoods.
:param corr: correction factor:
ULexp_mod = ULexp / (1. - corr*((ULobs-ULexp)/(ULobs+ULexp)))
When comparing with likelihoods constructed from efficiency maps,
a factor of corr = 0.6 has been found to result in the best approximations.
:returns: likelihood (float), muhat, and sigma_mu
"""
sllhd = "llhd"
if return_nll:
sllhd = "nll"
if self.upperLimitOnMu < self.expectedUpperLimitOnMu:
## underfluctuation. muhat = 0.
if allowNegativeSignals:
xa = -self.expectedUpperLimitOnMu
xb = 1
muhat = 0.
if expected == False:
muhat = self._findMuhat( xa, xb )
ret = self._computeLlhd(mu, muhat, return_nll = return_nll )
return { sllhd: ret, "muhat": muhat, "sigma_mu": self.sigma_mu }
else:
ret = self._computeLlhd(mu, 0.0, return_nll = return_nll )
return { sllhd: ret, "muhat": 0.0, "sigma_mu": self.sigma_mu }
muhat = 0.
if expected == False:
xa = -self.expectedUpperLimitOnMu
xb = self.expectedUpperLimitOnMu
muhat = self._findMuhat(xa,xb)
ret = self._computeLlhd(mu, muhat, return_nll = False )
if return_nll:
ret = -np.log(ret)
return { sllhd: ret, "muhat": muhat, "sigma_mu": self.sigma_mu }
def _getSigmaMu( self ):
""" get the standard deviation sigma on the signal strength mu, given
an upper limit and a central value. assumes a truncated Gaussian likelihood
"""
# the expected scale, eq 3.24 in arXiv:1202.3415
sigma_mu = self.expectedUpperLimitOnMu / 1.96
#if self.newCorrectionType: # we could correct here
# sigma_mu = sigma_mu / ( 1. - self.corr/2.)
return sigma_mu
def _root_func( self, mu : float ):
""" the root of this one should determine muhat """
numerator = erf((self.upperLimitOnMu - mu) / self.denominator) + \
erf( mu / self.denominator)
denominator = 1.0 + erf(mu / self.denominator)
if denominator == 0. and np.isclose ( numerator, denominator ):
## 0 / 0 = 1 (in this case)
return 1. - self.cl
ret = numerator / denominator - self.cl
return ret
def _findMuhat( self, xa : float = 0.,
xb : Union[float,None] = None ):
""" find muhat, in [xa,xb]
:param xa: lower limit of initial bracket
:param xb: upper limit of initial bracket. if none, then max(ul,eul)
"""
if xa == None:
xa = 0.
if xb == None:
xb = max(self.upperLimitOnMu, self.expectedUpperLimitOnMu)
c = 0
ra, rb = self._root_func(xa), self._root_func(xb)
while ra * rb > 0:
c += 1
if c > 10:
line = f"cannot find bracket for brent bracketing ul={self.upperLimitOnMu:.2f}, eul={self.expectedUpperLimitOnMu:.2f},r({xa:.2f})={self._root_func(xa):.2f}, r({xb:.2f})={self._root_func(xb):.2f}"
logger.error( line )
raise ValueError ( line )
while ra < 0.:
xa = xa - 1.5 * abs(xa)-.3
ra = self._root_func ( xa )
while rb > 0.:
xb = xb + 1.5 * abs(xb)+.3
rb = self._root_func ( xb )
try:
muhat = optimize.toms748(self._root_func, xa, xb, rtol=1e-07, xtol=1e-07)
except ValueError as e:
logger.error ( f"truncated gaussian got ValueError {e}: rf({xa:.2f})={self._root_func(xa):.2f}, rf({xb:.2f})={self._root_func(xb):.2f}" )
logger.error ( f"ul={self.upperLimitOnMu:.2f}, eul={self.expectedUpperLimitOnMu:.2f}" )
muhat = optimize.toms748(self._root_func, xa, xb, rtol=1e-02, xtol=1e-02)
logger.error ( f"retry with tol=1e-2 seemed to have worked" )
return muhat
def _computeLlhd( self, mu, muhat, return_nll):
if mu is None:
mu = muhat
sigma_mu = self.sigma_mu
## we could also correct here
if self.newCorrectionType:
sigma_mu = self.sigma_mu / ( 1. - self.corr / 2. )
# need to account for the truncation!
# first compute how many sigmas left of center is 0.
Zprime = muhat / sigma_mu
# now compute the area of the truncated gaussian
A = stats.norm.cdf(Zprime)
if return_nll:
return np.log(A) - stats.norm.logpdf(mu, muhat, sigma_mu)
return float(stats.norm.pdf(mu, muhat, sigma_mu) / A)