Source code for experiment.txnameDataObj

# !/usr/bin/env python3

"""
.. module:: txnameDataObj
   :synopsis: Holds the classes to interpolate the data grid.

.. moduleauthor:: Andre Lessa <lessa.a.p@gmail.com>
.. moduleauthor:: Wolfgang Waltenberger <wolfgang.waltenberger@gmail.com>

"""

import sys

from smodels.base.smodelsLogging import logger
from smodels.experiment.exceptions import SModelSExperimentError as SModelSError
from smodels.tools.caching import _memoize
from scipy.linalg import svd, LinAlgError
import numpy as np
import math
from math import floor, log10


[docs]class TxNameData(object):
    """
    Holds the pre-processed data for the Txname object.
    It is responsible for computing the PCA transformation and interpolating.
    Only handles pre-processed data (1D unitless arrays, with widths rescaled).
    """
    _keep_values = False  # keep the original values, only for debugging

    def __init__(self, x, y, txdataId,
                 accept_errors_upto=.05):
        """
        :param x: 2-D list of flat and unitless x-points (e.g. [ [mass1,mass2,mass3,mass4], ...])
        :param y: 1-D list with y-values (upper limits or efficiencies)
        :param _accept_errors_upto: If None, do not allow extrapolations outside of
                convex hull.  If float value given, allow that much relative
                uncertainty on the upper limit / efficiency
                when extrapolating outside convex hull.
                This method can be used to loosen the equal branches assumption.


        """
        self._id = txdataId
        self._accept_errors_upto = accept_errors_upto
        self._V = None
        self.y_values = y[:]
        # Compute PCA transformation:
        self.computeV(x)
        if self._keep_values:
            self.origdata = x

    def __str__(self):
        """ a simple unique string identifier, mostly for _memoize """
        return str(self._id)

[docs]    def round_to_n(self, x, n):
        if x == 0.0:
            return x
        return round(x, int(-np.sign(x) * int(floor(log10(abs(x)))) + (n - 1)))

    def __ne__(self, other):
        return not self.__eq__(other)

    def __eq__(self, other):
        if type(self) != type(other):
            return False
        return self._id == other._id

[docs]    def PCAtransf(self, point):
        """
        Transform a flat/unitless point with masses/widths to the PCA
        coordinate space.

        :param point: Flat and unitless mass/rescaled width point (e.g. [mass1,mass2,width1]).
                      Its length should be equal to self.full_dimensionality.

        :return: 1D array in coordinate space

        """

        # Transform to PCA coordinates (if rotMatrix and transVector are defined:
        transVector = self.delta_x  # Translation vector
        rotMatrix = self._V  # Rotation matrix

        point = np.array([point])
        point = ((point - transVector)).tolist()[0]  # Translate
        point = np.dot(point, rotMatrix)  # Rotate

        return point

[docs]    def inversePCAtransf(self, point):
        """
        Transform a a flat 1D point from coordinate space to flat/unitless point
        with masses/rescaled widths.

        :param point: 1D array in coordinate space

        :return: Flat and unitless mass/rescaled width point (e.g. [mass1,mass2,width1]).

        """

        if len(point) != self.full_dimensionality and len(point) != self.dimensionality:
            msgError = "Wrong point dimensions (%i)," % (len(point))
            msgError += " it should be % i(reduced dimensions)" % self.dimensionality
            msgError += " or %i(full dimensionts)" % self.full_dimensionality
            logger.error(msgError)
            raise SModelSError(msgError)

        elif len(point) != self.full_dimensionality:
            pointFull = np.array(point[:])
            pointFull = np.append(pointFull, [0.]*(self.full_dimensionality-len(point)))
        else:
            pointFull = np.array(point[:])

        # Transform to PCA coordinates (if rotMatrix and transVector are defined:
        transVector = self.delta_x  # Translation vector
        rotMatrix = self._V  # Rotation matrix

        point = np.array(pointFull)
        point = np.dot(rotMatrix, point)  # Rotate
        point = ((point + transVector)).tolist()[0]  # Translate

        return point

[docs]    @_memoize
    def getValueFor(self, point):
        """
        Returns the UL or efficiency for the point.

        :param point: Flat and unitless mass/width point (e.g. [mass1,mass2,width1]).
                      Its length should be equal to self.full_dimensionality.

        :return: Interpolated value for the grid (without units)
        """

        # Transform point accordint to PCA transformation:
        point = self.PCAtransf(point)

        self.projected_value = self.interpolate(point[:self.dimensionality])

        # Check if input point has larger dimensionality:
        dp = self.countNonZeros(point)
        if dp > self.dimensionality:  # we have data in different dimensions
            if self._accept_errors_upto is None:
                return None
            logger.debug("attempting to interpolate outside of convex hull "
                         "(d=%d,dp=%d,point=%s)" %
                         (self.dimensionality, dp, str(point)))
            val = self._interpolateOutsideConvexHull(point)
        else:
            val = self._returnProjectedValue()
        return val

[docs]    def interpolate(self, point, fill_value=np.nan):
        """
        Returns the interpolated value for the point (in coordinates)

        :param point: Point in coordinate space (length = self.dimensionality)

        :return: Value for point without units
        """

        tol = 1e-6
        #  tol = sys.float_info.epsilon * 1e10
        simplex = self.tri.find_simplex(point, tol=tol)
        if simplex == -1:  # not inside any simplex?
            return fill_value

        # Transformation matrix for the simplex:
        simplexTrans = np.take(self.tri.transform, simplex, axis=0)
        # Space dimension:
        d = simplexTrans.shape[-1]
        # Rotation and translation to baryocentric coordinates:
        delta_x = simplexTrans[d, :]
        rot = simplexTrans[:d, :]
        bary = np.dot(rot, point-delta_x)  # Point coordinates in the baryocentric system
        # Weights for the vertices:
        wts = np.append(bary, 1. - bary.sum())
        # Vertex indices:
        vertices = np.take(self.tri.simplices, simplex, axis=0)
        # Compute the value:
        values = np.array(self.y_values)
        ret = np.dot(np.take(values, vertices), wts)
        minXsec = min(np.take(values, vertices))
        if ret < minXsec:
            logger.debug('Interpolation below simplex values. Will take the smallest simplex value.')
            ret = minXsec
        # Round to 6 significant digits to avoid
        # numerical instabilities
        ret = self.round_to_n(float(ret),6)
        return ret

    def _estimateExtrapolationError(self, point):
        """
        When projecting a point from full_dimensionality to self.dimensionality, we
        estimate the expected extrapolation error with the following
        strategy: we compute the gradient at point P, and let alpha be the
        distance between p and P. We then walk one step of length alpha in
        the direction of the greatest ascent, and the opposite direction.
        Whichever relative change is greater is reported as the expected
        extrapolation error.

        :param point: Point in coordinate space (length = self.full_dimensionality)
        """

        # Make sure the point is a numpy array
        point = np.array(point)
        # #  how far are we away from the "plane": distance alpha
        alpha = float(np.sqrt(np.dot(point[self.dimensionality:],
                                     point[self.dimensionality:])))
        if alpha == 0.:
            # #  no distance to the plane, so no extrapolation error
            return 0.
        # #  the value of the grid at the point projected to the "plane"

        # #  compute gradient
        gradient = []
        for i in range(self.dimensionality):
            P2 = np.copy(point)
            P2[i] += alpha
            pv = self.interpolate(P2[:self.dimensionality])
            g = float((pv - self.projected_value)/alpha)
            if math.isnan(g):
                # #  if we cannot compute a gradient, we return nan
                return float("nan")
            gradient.append(g)
        # #  normalize gradient
        C = float(np.sqrt(np.dot(gradient, gradient)))
        if C == 0.:
            # #  zero gradient? we return 0.
            return 0.
        for i, grad in enumerate(gradient):
            gradient[i] = grad/C*alpha
        # #  walk one alpha along gradient
        P3 = np.copy(point)
        P4 = np.copy(point)
        for grad in gradient:
            P3[i] += grad
            P4[i] -= grad
        agp = self.interpolate(P3[:self.dimensionality])
        agm = self.interpolate(P4[:self.dimensionality])
        dep, dem = 0., 0.
        if self.projected_value == 0.:
            if agp != 0.:
                dep = 1.0
            if agm != 0.:
                dem = 1.0
        else:
            dep = abs(agp - self.projected_value)/self.projected_value
            dem = abs(agm - self.projected_value)/self.projected_value
        de = dep
        if dem > de:
            de = dem
        return de

    def _interpolateOutsideConvexHull(self, point):
        """
        Experimental routine, meant to check if we can interpolate outside
        convex hull

        :param point: Point in coordinate space (length = self.full_dimensionality)
        """

        # Make sure the point is a numpy array
        point = np.array(point)
        de = self._estimateExtrapolationError(point)

        if de < self._accept_errors_upto:
            return self._returnProjectedValue()

        if not math.isnan(de):
            logger.debug("Expected propagation error of %f too large to "
                         "propagate." % de)
        return None

    def _returnProjectedValue(self):
        """
        Return interpolation result with the appropriate units.
        """

        # #  None is returned without units'
        if self.projected_value is None or math.isnan(self.projected_value):
            logger.debug("Projected value is None. Projected point not in convex hull?")
            return None

        # Set value to zero if it is lower than machine precision (avoids fake negative values)
        if abs(self.projected_value) < 100.*sys.float_info.epsilon:
            self.projected_value = 0.

        return self.projected_value

[docs]    def countNonZeros(self, mp):
        """ count the nonzeros in a vector """
        nz = 0
        lim = 10**-4
        for i in mp:
            if abs(i) > lim:
                nz += 1
        return nz

[docs]    def onlyZeroValues(self):
        """ check if the map is zeroes only """
        eps = sys.float_info.epsilon
        negative_values = bool(sum([x < -eps for x in self.y_values]))
        if negative_values:
            for x in self.y_values:
                if x < -eps:
                    logger.error("negative error in result: %f, %s" %
                                 (x, self._id))
                    sys.exit()
        if sum(self.y_values) > 0.:
            return False
        return True

[docs]    def computeV(self, x):
        """
        Compute rotation matrix _V, and triangulation self.tri

        :parameter x: 2-D array with the flatten x-points without units
                      (e.g. [ [mass1,mass2,mass3,mass4], [mass1',mass2',mass3',mass4'], ...])

        """

        if self._V is not None:
            return

        # Convert nested mass arrays (with width tuples) to coordinates
        # (remove entries in mass corresponding to inclusive values,
        # select the required widths and combine masses and widths
        # in a flat array where the widths are the last entries)
        Morig = x[:]

        aM = np.array(Morig, dtype=object)
        MT = aM.T.tolist()
        self.delta_x = np.array([[sum(x)/len(Morig) for x in MT]])
        M = []

        for Mx in Morig:
            m = (np.array([Mx]) - self.delta_x).tolist()[0]
            M.append(m)

        try:
            # #  we dont need thousands of points for SVD
            n = int(math.ceil(len(M)/2000.))
            Vt = svd(M[::n])[2]
        except LinAlgError as e:
            raise SModelSError("exception caught when performing singular value decomposition: %s, %s" % (type(e), e))

        V = Vt.T
        self._V = V  # self.round ( V )
        Mp = []

        # #  the dimensionality of the whole mass space, disrespecting equal branches
        # #  assumption
        self.full_dimensionality = len(Morig[0])
        self.dimensionality = 0
        for m in M:
            mp = np.dot(m, V)
            Mp.append(mp)
            nz = self.countNonZeros(mp)
            if nz > self.dimensionality:
                self.dimensionality = nz
        MpCut = []
        for i in Mp:
            MpCut.append(i[:self.dimensionality].tolist())

        if self.dimensionality > 1:
            try:
                from scipy.spatial import Delaunay
            except (ImportError, ModuleNotFoundError):
                from scipy.spatial.qhull import Delaunay
            self.tri = Delaunay(MpCut)
        else:
            self.tri = Delaunay1D(MpCut)


[docs]class Delaunay1D:
    """
    Uses a 1D data array to interpolate the data.
    The attribute simplices is a list of N-1 pair of ints with the indices of the points
    forming the simplices (e.g. [[0,1],[1,2],[3,4],...]).
    """

    def __init__(self, data):

        self.points = None
        self.simplices = None
        self.transform = None
        if data and self.checkData(data):
            self.points = sorted(data)
            # Create simplices as the point intervals (using the sorted data)
            self.simplices = np.array([[data.index(self.points[i+1]), data.index(pt)]
                                       for i, pt in enumerate(self.points[:-1])])
            transform = []
            # Create trivial transformation to the baryocentric coordinates:
            for simplex in self.simplices:
                xmax, xmin = data[simplex[0]][0], data[simplex[1]][0]
                transform.append([[1./(xmax-xmin)], [xmin]])
            self.transform = np.array(transform)

            # Store convex hull (first and last point):
            self.convex_hull = np.array([data.index(self.points[0]), data.index(self.points[-1])])

        else:
            raise SModelSError()

[docs]    def find_simplex(self, x, tol=0.):
        """
        Find 1D data interval (simplex) to which x belongs

        :param x: Point (float) without units
        :param tol: Tolerance. If x is outside the data range with distance < tol, extrapolate.

        :return: simplex index (int)
        """

        xi = self.find_index(self.points, x)
        if xi == -1:
            if abs(x-self.points[0]) < tol:
                return 0
            else:
                return -1
        elif xi == len(self.simplices):
            if abs(x-self.points[-1]) < tol:
                return xi-1
            else:
                return -1
        else:
            return xi

[docs]    def checkData(self, data):
        """
        Define the simplices according to data. Compute and store
        the transformation matrix and simplices self.point.
        """
        if not isinstance(data, list):
            logger.error("Input data for 1D Delaunay should be a list.")
            return False
        for pt in data:
            if (not isinstance(pt, list)) or len(pt) != 1 or (not isinstance(pt[0], float)):
                logger.error("Input data for 1D Delaunay is in wrong format. It should be [[x1],[x2],..]")
                return False
        return True

[docs]    def find_index(self, xlist, x):
        """
        Efficient way to find x in a list.
        Returns the index (i) of xlist such that xlist[i] < x <= xlist[i+1].
        If x > max(xlist), returns the length of the list.
        If x < min(xlist), returns 0.        vertices = np.take(self.tri.simplices, simplex, axis=0)
        temp = np.take(self.tri.transform, simplex, axis=0)
        d=temp.shape[2]
        delta = uvw - temp[:, d]


        :param xlist: List of x-type objects
        :param x: object to be searched for.

        :return: Index of the list such that xlist[i] < x <= xlist[i+1].
        """

        lo = 0
        hi = len(xlist)
        while lo < hi:
            mid = (lo+hi)//2
            if xlist[mid] < x:
                lo = mid+1
            else:
                hi = mid
        return lo-1