Source code for experiment.txnameDataObj

# !/usr/bin/env python3

"""
.. module:: txnameDataObj
   :synopsis: Holds the classes to interpolate the data grid.

.. moduleauthor:: Andre Lessa <lessa.a.p@gmail.com>
.. moduleauthor:: Wolfgang Waltenberger <wolfgang.waltenberger@gmail.com>

"""

import sys

from smodels.base.smodelsLogging import logger
from smodels.experiment.exceptions import SModelSExperimentError as SModelSError
from smodels.tools.caching import _memoize
from scipy.linalg import svd, LinAlgError
import numpy as np
import math
from math import floor, log10


[docs]class TxNameData(object): """ Holds the pre-processed data for the Txname object. It is responsible for computing the PCA transformation and interpolating. Only handles pre-processed data (1D unitless arrays, with widths rescaled). """ _keep_values = False # keep the original values, only for debugging def __init__(self, x, y, txdataId, accept_errors_upto=.05): """ :param x: 2-D list of flat and unitless x-points (e.g. [ [mass1,mass2,mass3,mass4], ...]) :param y: 1-D list with y-values (upper limits or efficiencies) :param _accept_errors_upto: If None, do not allow extrapolations outside of convex hull. If float value given, allow that much relative uncertainty on the upper limit / efficiency when extrapolating outside convex hull. This method can be used to loosen the equal branches assumption. """ self._id = txdataId self._accept_errors_upto = accept_errors_upto self._V = None self.y_values = y[:] # Compute PCA transformation: self.computeV(x) if self._keep_values: self.origdata = x def __str__(self): """ a simple unique string identifier, mostly for _memoize """ return str(self._id)
[docs] def round_to_n(self, x, n): if x == 0.0: return x return round(x, int(-np.sign(x) * int(floor(log10(abs(x)))) + (n - 1)))
def __ne__(self, other): return not self.__eq__(other) def __eq__(self, other): if type(self) != type(other): return False return self._id == other._id
[docs] def PCAtransf(self, point): """ Transform a flat/unitless point with masses/widths to the PCA coordinate space. :param point: Flat and unitless mass/rescaled width point (e.g. [mass1,mass2,width1]). Its length should be equal to self.full_dimensionality. :return: 1D array in coordinate space """ # Transform to PCA coordinates (if rotMatrix and transVector are defined: transVector = self.delta_x # Translation vector rotMatrix = self._V # Rotation matrix point = np.array([point]) point = ((point - transVector)).tolist()[0] # Translate point = np.dot(point, rotMatrix) # Rotate return point
[docs] def inversePCAtransf(self, point): """ Transform a a flat 1D point from coordinate space to flat/unitless point with masses/rescaled widths. :param point: 1D array in coordinate space :return: Flat and unitless mass/rescaled width point (e.g. [mass1,mass2,width1]). """ if len(point) != self.full_dimensionality and len(point) != self.dimensionality: msgError = "Wrong point dimensions (%i)," % (len(point)) msgError += " it should be % i(reduced dimensions)" % self.dimensionality msgError += " or %i(full dimensionts)" % self.full_dimensionality logger.error(msgError) raise SModelSError(msgError) elif len(point) != self.full_dimensionality: pointFull = np.array(point[:]) pointFull = np.append(pointFull, [0.]*(self.full_dimensionality-len(point))) else: pointFull = np.array(point[:]) # Transform to PCA coordinates (if rotMatrix and transVector are defined: transVector = self.delta_x # Translation vector rotMatrix = self._V # Rotation matrix point = np.array(pointFull) point = np.dot(rotMatrix, point) # Rotate point = ((point + transVector)).tolist()[0] # Translate return point
[docs] @_memoize def getValueFor(self, point): """ Returns the UL or efficiency for the point. :param point: Flat and unitless mass/width point (e.g. [mass1,mass2,width1]). Its length should be equal to self.full_dimensionality. :return: Interpolated value for the grid (without units) """ # Transform point accordint to PCA transformation: point = self.PCAtransf(point) self.projected_value = self.interpolate(point[:self.dimensionality]) # Check if input point has larger dimensionality: dp = self.countNonZeros(point) if dp > self.dimensionality: # we have data in different dimensions if self._accept_errors_upto is None: return None logger.debug("attempting to interpolate outside of convex hull " "(d=%d,dp=%d,point=%s)" % (self.dimensionality, dp, str(point))) val = self._interpolateOutsideConvexHull(point) else: val = self._returnProjectedValue() return val
[docs] def interpolate(self, point, fill_value=np.nan): """ Returns the interpolated value for the point (in coordinates) :param point: Point in coordinate space (length = self.dimensionality) :return: Value for point without units """ tol = 1e-6 # tol = sys.float_info.epsilon * 1e10 simplex = self.tri.find_simplex(point, tol=tol) if simplex == -1: # not inside any simplex? return fill_value # Transformation matrix for the simplex: simplexTrans = np.take(self.tri.transform, simplex, axis=0) # Space dimension: d = simplexTrans.shape[-1] # Rotation and translation to baryocentric coordinates: delta_x = simplexTrans[d, :] rot = simplexTrans[:d, :] bary = np.dot(rot, point-delta_x) # Point coordinates in the baryocentric system # Weights for the vertices: wts = np.append(bary, 1. - bary.sum()) # Vertex indices: vertices = np.take(self.tri.simplices, simplex, axis=0) # Compute the value: values = np.array(self.y_values) ret = np.dot(np.take(values, vertices), wts) minXsec = min(np.take(values, vertices)) if ret < minXsec: logger.debug('Interpolation below simplex values. Will take the smallest simplex value.') ret = minXsec # Round to 6 significant digits to avoid # numerical instabilities ret = self.round_to_n(float(ret),6) return ret
def _estimateExtrapolationError(self, point): """ When projecting a point from full_dimensionality to self.dimensionality, we estimate the expected extrapolation error with the following strategy: we compute the gradient at point P, and let alpha be the distance between p and P. We then walk one step of length alpha in the direction of the greatest ascent, and the opposite direction. Whichever relative change is greater is reported as the expected extrapolation error. :param point: Point in coordinate space (length = self.full_dimensionality) """ # Make sure the point is a numpy array point = np.array(point) # # how far are we away from the "plane": distance alpha alpha = float(np.sqrt(np.dot(point[self.dimensionality:], point[self.dimensionality:]))) if alpha == 0.: # # no distance to the plane, so no extrapolation error return 0. # # the value of the grid at the point projected to the "plane" # # compute gradient gradient = [] for i in range(self.dimensionality): P2 = np.copy(point) P2[i] += alpha pv = self.interpolate(P2[:self.dimensionality]) g = float((pv - self.projected_value)/alpha) if math.isnan(g): # # if we cannot compute a gradient, we return nan return float("nan") gradient.append(g) # # normalize gradient C = float(np.sqrt(np.dot(gradient, gradient))) if C == 0.: # # zero gradient? we return 0. return 0. for i, grad in enumerate(gradient): gradient[i] = grad/C*alpha # # walk one alpha along gradient P3 = np.copy(point) P4 = np.copy(point) for grad in gradient: P3[i] += grad P4[i] -= grad agp = self.interpolate(P3[:self.dimensionality]) agm = self.interpolate(P4[:self.dimensionality]) dep, dem = 0., 0. if self.projected_value == 0.: if agp != 0.: dep = 1.0 if agm != 0.: dem = 1.0 else: dep = abs(agp - self.projected_value)/self.projected_value dem = abs(agm - self.projected_value)/self.projected_value de = dep if dem > de: de = dem return de def _interpolateOutsideConvexHull(self, point): """ Experimental routine, meant to check if we can interpolate outside convex hull :param point: Point in coordinate space (length = self.full_dimensionality) """ # Make sure the point is a numpy array point = np.array(point) de = self._estimateExtrapolationError(point) if de < self._accept_errors_upto: return self._returnProjectedValue() if not math.isnan(de): logger.debug("Expected propagation error of %f too large to " "propagate." % de) return None def _returnProjectedValue(self): """ Return interpolation result with the appropriate units. """ # # None is returned without units' if self.projected_value is None or math.isnan(self.projected_value): logger.debug("Projected value is None. Projected point not in convex hull?") return None # Set value to zero if it is lower than machine precision (avoids fake negative values) if abs(self.projected_value) < 100.*sys.float_info.epsilon: self.projected_value = 0. return self.projected_value
[docs] def countNonZeros(self, mp): """ count the nonzeros in a vector """ nz = 0 lim = 10**-4 for i in mp: if abs(i) > lim: nz += 1 return nz
[docs] def onlyZeroValues(self): """ check if the map is zeroes only """ eps = sys.float_info.epsilon negative_values = bool(sum([x < -eps for x in self.y_values])) if negative_values: for x in self.y_values: if x < -eps: logger.error("negative error in result: %f, %s" % (x, self._id)) sys.exit() if sum(self.y_values) > 0.: return False return True
[docs] def computeV(self, x): """ Compute rotation matrix _V, and triangulation self.tri :parameter x: 2-D array with the flatten x-points without units (e.g. [ [mass1,mass2,mass3,mass4], [mass1',mass2',mass3',mass4'], ...]) """ if self._V is not None: return # Convert nested mass arrays (with width tuples) to coordinates # (remove entries in mass corresponding to inclusive values, # select the required widths and combine masses and widths # in a flat array where the widths are the last entries) Morig = x[:] aM = np.array(Morig, dtype=object) MT = aM.T.tolist() self.delta_x = np.array([[sum(x)/len(Morig) for x in MT]]) M = [] for Mx in Morig: m = (np.array([Mx]) - self.delta_x).tolist()[0] M.append(m) try: # # we dont need thousands of points for SVD n = int(math.ceil(len(M)/2000.)) Vt = svd(M[::n])[2] except LinAlgError as e: raise SModelSError("exception caught when performing singular value decomposition: %s, %s" % (type(e), e)) V = Vt.T self._V = V # self.round ( V ) Mp = [] # # the dimensionality of the whole mass space, disrespecting equal branches # # assumption self.full_dimensionality = len(Morig[0]) self.dimensionality = 0 for m in M: mp = np.dot(m, V) Mp.append(mp) nz = self.countNonZeros(mp) if nz > self.dimensionality: self.dimensionality = nz MpCut = [] for i in Mp: MpCut.append(i[:self.dimensionality].tolist()) if self.dimensionality > 1: try: from scipy.spatial import Delaunay except (ImportError, ModuleNotFoundError): from scipy.spatial.qhull import Delaunay self.tri = Delaunay(MpCut) else: self.tri = Delaunay1D(MpCut)
[docs]class Delaunay1D: """ Uses a 1D data array to interpolate the data. The attribute simplices is a list of N-1 pair of ints with the indices of the points forming the simplices (e.g. [[0,1],[1,2],[3,4],...]). """ def __init__(self, data): self.points = None self.simplices = None self.transform = None if data and self.checkData(data): self.points = sorted(data) # Create simplices as the point intervals (using the sorted data) self.simplices = np.array([[data.index(self.points[i+1]), data.index(pt)] for i, pt in enumerate(self.points[:-1])]) transform = [] # Create trivial transformation to the baryocentric coordinates: for simplex in self.simplices: xmax, xmin = data[simplex[0]][0], data[simplex[1]][0] transform.append([[1./(xmax-xmin)], [xmin]]) self.transform = np.array(transform) # Store convex hull (first and last point): self.convex_hull = np.array([data.index(self.points[0]), data.index(self.points[-1])]) else: raise SModelSError()
[docs] def find_simplex(self, x, tol=0.): """ Find 1D data interval (simplex) to which x belongs :param x: Point (float) without units :param tol: Tolerance. If x is outside the data range with distance < tol, extrapolate. :return: simplex index (int) """ xi = self.find_index(self.points, x) if xi == -1: if abs(x-self.points[0]) < tol: return 0 else: return -1 elif xi == len(self.simplices): if abs(x-self.points[-1]) < tol: return xi-1 else: return -1 else: return xi
[docs] def checkData(self, data): """ Define the simplices according to data. Compute and store the transformation matrix and simplices self.point. """ if not isinstance(data, list): logger.error("Input data for 1D Delaunay should be a list.") return False for pt in data: if (not isinstance(pt, list)) or len(pt) != 1 or (not isinstance(pt[0], float)): logger.error("Input data for 1D Delaunay is in wrong format. It should be [[x1],[x2],..]") return False return True
[docs] def find_index(self, xlist, x): """ Efficient way to find x in a list. Returns the index (i) of xlist such that xlist[i] < x <= xlist[i+1]. If x > max(xlist), returns the length of the list. If x < min(xlist), returns 0. vertices = np.take(self.tri.simplices, simplex, axis=0) temp = np.take(self.tri.transform, simplex, axis=0) d=temp.shape[2] delta = uvw - temp[:, d] :param xlist: List of x-type objects :param x: object to be searched for. :return: Index of the list such that xlist[i] < x <= xlist[i+1]. """ lo = 0 hi = len(xlist) while lo < hi: mid = (lo+hi)//2 if xlist[mid] < x: lo = mid+1 else: hi = mid return lo-1